## PatentsView PatentSearch API Quickstart (키 기반)

이 노트북은 **PatentsView PatentSearch API Key**로 USPTO 특허 데이터를 검색/수집하는 절차를 확인하기 위한 예제입니다.

- 문서: [PatentsView PatentSearch Docs](https://search.patentsview.org/docs/)
- 키 신청/관리: [PatentsView Support Portal](https://patentsview-support.atlassian.net/servicedesk/customer/portals)

### 준비
- 프로젝트 루트에 `env`(권장) 또는 `.env` 파일을 만들고 아래 값을 채우세요.
  - `PATENTSVIEW_API_KEY=...`

> 주의: API 키를 노트북/레포에 하드코딩하지 마세요.


In [1]:
import os
import json
import time
from typing import Any, Dict, List, Optional, Tuple

import requests
from dotenv import load_dotenv

# 키 로드: 자동 탐색(load_dotenv())은 환경에 따라 AssertionError가 날 수 있어 명시 로드로 처리
load_dotenv(".env")
load_dotenv("env")

PATENTSVIEW_API_KEY = os.getenv("PATENTSVIEW_API_KEY", "")
BASE_URL = os.getenv("PATENTSVIEW_BASE_URL", "https://search.patentsview.org/api/v1")

if not PATENTSVIEW_API_KEY:
    raise ValueError("PATENTSVIEW_API_KEY가 비어 있습니다. 프로젝트 루트의 env/.env에 설정하세요.")

SESSION = requests.Session()
SESSION.headers.update(
    {
        # PatentsView PatentSearch 문서 기준: X-Api-Key 헤더 사용
        "X-Api-Key": PATENTSVIEW_API_KEY,
        "Accept": "application/json",
        "Content-Type": "application/json",
        "User-Agent": "ptab-dataset-notebook/0.1",
    }
)

print("BASE_URL:", BASE_URL)
print("API KEY loaded:", PATENTSVIEW_API_KEY[:4] + "..." + PATENTSVIEW_API_KEY[-4:])


BASE_URL: https://search.patentsview.org/api/v1
API KEY loaded: hzHy...D9eX


In [2]:
def pv_post(path: str, payload: Dict[str, Any], *, timeout: int = 60) -> Dict[str, Any]:
    """PatentsView PatentSearch API에 POST 요청."""
    url = f"{BASE_URL.rstrip('/')}/{path.lstrip('/')}"
    r = SESSION.post(url, data=json.dumps(payload), timeout=timeout)
    if r.status_code >= 400:
        # 디버깅을 위해 일부만 출력
        raise RuntimeError(f"HTTP {r.status_code}: {r.text[:500]}")
    return r.json()


def build_query_text_any(*, fields: List[str], terms: List[str]) -> Dict[str, Any]:
    """여러 필드에 대해 텍스트 any 매칭 OR 쿼리 생성."""
    ors = []
    for f in fields:
        for t in terms:
            ors.append({"_text_any": {f: t}})
    return {"_or": ors}


def patents_search(
    *,
    q: Dict[str, Any],
    f: List[str],
    per_page: int = 25,
    page: int = 1,
    sort: Optional[List[Dict[str, str]]] = None,
) -> Dict[str, Any]:
    """특허 검색(페이지 단위)."""
    payload: Dict[str, Any] = {
        "q": q,
        "f": f,
        "o": {
            "per_page": per_page,
            "page": page,
        },
    }
    if sort:
        payload["s"] = sort

    # PatentSearch 기본 예시 엔드포인트
    return pv_post("patent/", payload)


def fetch_all_pages(
    *,
    q: Dict[str, Any],
    f: List[str],
    per_page: int = 100,
    max_pages: int = 3,
    sleep_sec: float = 0.2,
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
    """여러 페이지를 순회하며 특허 리스트를 누적 수집."""
    all_rows: List[Dict[str, Any]] = []
    last_meta: Dict[str, Any] = {}

    for page in range(1, max_pages + 1):
        data = patents_search(q=q, f=f, per_page=per_page, page=page)
        # 응답 키는 보통 patents / count / total_patent_count 등으로 제공될 수 있음
        patents = data.get("patents") or data.get("results") or []
        all_rows.extend(patents)
        last_meta = {k: v for k, v in data.items() if k != "patents"}

        # 더 이상 결과가 없으면 종료
        if not patents:
            break

        time.sleep(sleep_sec)

    return all_rows, last_meta


In [3]:
# 공통으로 받을 필드(문서에서 필요 시 추가/수정하세요)
# 중요: PatentSearch API에서 특허 번호는 `patent_number`가 아니라 `patent_id`입니다.
FIELDS_BASIC = [
    "patent_id",
    "patent_title",
    "patent_date",
    "patent_abstract",
]

# (샘플) LLM/Agentic AI 관련 특허: 제목/초록에 키워드가 등장하는 케이스를 폭넓게
q_llm_agentic = build_query_text_any(
    fields=["patent_title", "patent_abstract"],
    terms=[
        "LLM",
        "large language model",
        "agentic",
        "autonomous agent",
        "tool use",
        "retrieval augmented generation",
        "RAG",
    ],
)

rows_llm, meta_llm = fetch_all_pages(q=q_llm_agentic, f=FIELDS_BASIC, per_page=25, max_pages=2)
print("rows:", len(rows_llm))
print("meta keys:", list(meta_llm.keys())[:20])

# 결과 일부 확인
for r in rows_llm[:3]:
    print(r.get("patent_id"), r.get("patent_date"), r.get("patent_title"))


rows: 200
meta keys: ['error', 'count', 'total_hits']
10000003 2018-06-19 Method for producing a container from a thermoplastic
10000006 2018-06-19 Thermoforming mold device and a process for its manufacture and use
10000007 2018-06-19 PEX expanding tool


In [4]:
# (샘플) 반도체 장비 관련 특허: 장비/공정 키워드를 OR로 묶어서 폭넓게
q_semi_equipment = build_query_text_any(
    fields=["patent_title", "patent_abstract"],
    terms=[
        "semiconductor equipment",
        "lithography",
        "photoresist",
        "etch",
        "plasma etch",
        "CVD",
        "PVD",
        "atomic layer deposition",
        "ALD",
        "wafer handling",
        "vacuum chamber",
    ],
)

rows_semi, meta_semi = fetch_all_pages(q=q_semi_equipment, f=FIELDS_BASIC, per_page=25, max_pages=2)
print("rows:", len(rows_semi))

for r in rows_semi[:3]:
    print(r.get("patent_id"), r.get("patent_date"), r.get("patent_title"))


rows: 200
10000002 2018-06-19 Method for manufacturing polymer film and co-extruded film
10000005 2018-06-19 Article vacuum formation method and vacuum forming apparatus
10000009 2018-06-19 Sterile environment for additive manufacturing


In [None]:
import pandas as pd
from pathlib import Path


def find_repo_root(start: Path | None = None) -> Path:
    cur = (start or Path.cwd()).resolve()
    for p in (cur, *cur.parents):
        if (p / "pyproject.toml").exists() and (p / "data").exists():
            return p
    return cur


def to_df(rows: List[Dict[str, Any]]) -> pd.DataFrame:
    df = pd.DataFrame(rows)
    # 최소 필드가 없을 수도 있어 방어적으로 처리
    if "patent_date" in df.columns:
        df["patent_date"] = pd.to_datetime(df["patent_date"], errors="coerce")
    return df


df_llm = to_df(rows_llm).sort_values(by="patent_date", ascending=False, na_position="last")
df_semi = to_df(rows_semi).sort_values(by="patent_date", ascending=False, na_position="last")

display(df_llm.head(10))
display(df_semi.head(10))

# 저장 (개발 전 단계에서 로컬로 절차 확인용)
repo_root = find_repo_root()
out_dir = repo_root / "data" / "processed"
out_dir.mkdir(parents=True, exist_ok=True)

df_llm.to_json(out_dir / "patentsview_llm_agentic_sample.jsonl", orient="records", lines=True, force_ascii=False)
df_semi.to_json(out_dir / "patentsview_semiconductor_equipment_sample.jsonl", orient="records", lines=True, force_ascii=False)

print("saved:", out_dir / "patentsview_llm_agentic_sample.jsonl")
print("saved:", out_dir / "patentsview_semiconductor_equipment_sample.jsonl")


Unnamed: 0,patent_id,patent_title,patent_date,patent_abstract,assignees,cpc_current,inventors,wipo
0,10000003,Method for producing a container from a thermo...,2018-06-19,The invention relates to a method for producin...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
137,10000320,Wine cork with built-in gas activated mini-cor...,2018-06-19,Some embodiments of the present disclosure inc...,,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
127,10000228,Steering apparatus,2018-06-19,A steering apparatus includes: a fixed bracket...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
128,10000261,"System, method, and apparatus for stabilizing ...",2018-06-19,A stabilizer that extends from the hull of a w...,,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
129,10000262,"Data-processing device, program, recording med...",2018-06-19,A terminal device stores ship characteristic d...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
130,10000263,Underwater system and method,2018-06-19,Systems and methods are provided for underwate...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
131,10000265,Controlling buoyancy of an underwater vehicle ...,2018-06-19,An underwater vehicle may include a buoyancy c...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
132,10000273,Passive load alleviation for a fiber reinforce...,2018-06-19,Awing box of an aircraft with a stiffened shel...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
133,10000289,Temperature control gasper apparatus,2018-06-19,A temperature control gasper apparatus for veh...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
134,10000299,Self-repair structures and methods for making ...,2018-06-19,Methods and apparatuses are disclosed relating...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...


Unnamed: 0,patent_id,patent_title,patent_date,patent_abstract,assignees,cpc_current,inventors,wipo
0,10000002,Method for manufacturing polymer film and co-e...,2018-06-19,The present invention relates to: a method for...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
137,10000293,Gas-electric propulsion system for an aircraft,2018-06-19,In one aspect the present subject matter is di...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
127,10000091,Tire comprising a tread made up of several ela...,2018-06-19,A tire with radial carcass reinforcement compr...,,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
128,10000178,Vehicle curtain airbag device,2018-06-19,A vehicle curtain airbag device that includes:...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
129,10000224,Handling device for the maintenance of agitato...,2018-06-19,The invention relates to a method and handling...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
130,10000240,Vehicle high voltage equipment mounting for re...,2018-06-19,"A vehicle includes an engine, a seat, an upper...",[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
131,10000259,Suction anchor,2018-06-19,A suction anchor for a remotely operated vehic...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
132,10000261,"System, method, and apparatus for stabilizing ...",2018-06-19,A stabilizer that extends from the hull of a w...,,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
133,10000265,Controlling buoyancy of an underwater vehicle ...,2018-06-19,An underwater vehicle may include a buoyancy c...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...
134,10000272,Thermal acoustic insulation blankets,2018-06-19,The present invention pertains to an insulatio...,[{'assignee': 'https://search.patentsview.org/...,"[{'cpc_sequence': 0, 'cpc_class': 'https://sea...",[{'inventor': 'https://search.patentsview.org/...,[{'wipo_field': 'https://search.patentsview.or...


saved: /home/arkwith/Dev/paper_data/data/processed/patentsview_llm_agentic_sample.jsonl
saved: /home/arkwith/Dev/paper_data/data/processed/patentsview_semiconductor_equipment_sample.jsonl


### 다음 단계(Agentic AI 검색 기능 개발을 위한 준비)

- **필드 확장**: 문서에서 필요한 필드를 골라 `FIELDS_BASIC`에 추가하세요(assignee, CPC, inventor 등).
- **쿼리 정교화**: 키워드 OR만으로는 노이즈가 커질 수 있어, IPC/CPC 필터를 병행하는 것을 권장합니다.
- **레이트리밋/재시도**: 대량 수집 시 `sleep_sec`, `max_pages`를 조절하고, 실패 응답을 로깅해 재시도 큐를 만드세요.

문서/쿼리 문법은 아래를 참고하세요.
- [PatentsView PatentSearch Docs](https://search.patentsview.org/docs/)
