In [1]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv

In [5]:
load_dotenv("../client/.env.local")

True

In [60]:
scanr_url = "https://cluster-production.elasticsearch.dataesr.ovh/scanr-patents/_search"
scanr_key = f'Basic {os.getenv("VITE_API_KEY")}'
scanr_headers = {"Authorization": scanr_key}

source_fields = ["id", "inpadocFamily", "applicants", "inventors", "title", "cpc", "year"]


def scanr_patents(size=10, query=None, organizationId=None):
    must = (
        {
            "query_string": {"query": query or "*"},
        }
        if organizationId is None
        else {"term": {"applicants.ids.id.keyword": organizationId}}
    )

    body = {
        "size": size,
        "_source": source_fields,
        "query": {
            "bool": {
                "must": [must],
            },
        },
    }

    res = requests.post(scanr_url, json=body, headers=scanr_headers)
    if res.status_code == 200:
        return res.json()
    else:
        print(f"error {res.status_code}: {res.reason}")
        print(res.text)
        return None

In [61]:
res = scanr_patents(organizationId="632012100", size=10000)
patents = [patent.get("_source") for patent in res.get("hits").get("hits")]
len(patents)

6595

In [62]:
from itertools import combinations


def persons_links(patents):
    links = {}
    for patent in patents:
        applicants = patent.get("applicants", [])
        inventors = patent.get("inventors", [])

        persons = list(
            set([applicant.get("name") for applicant in applicants + inventors if applicant.get("type") == "person"])
        )
        co_persons = [tuple(sorted(combination)) for combination in combinations(persons, 2)]

        for co_element in co_persons:
            link_id = "---".join(co_element)
            source_id = co_element[0]
            target_id = co_element[1]

            if link_id not in links:
                links[link_id] = {"source_id": source_id, "target_id": target_id, "strength": 1}
            else:
                links[link_id]["strength"] += 1
    return links


def organizations_links(patents):
    links = {}
    for patent in patents:
        applicants = patent.get("applicants", [])

        organizations = list(
            set([applicant.get("name") for applicant in applicants if applicant.get("type") == "organisation"])
        )
        co_organizations = [tuple(sorted(combination)) for combination in combinations(organizations, 2)]

        for co_element in co_organizations:
            link_id = "---".join(co_element)
            source_id = co_element[0]
            target_id = co_element[1]

            if link_id not in links:
                links[link_id] = {"source_id": source_id, "target_id": target_id, "strength": 1}
            else:
                links[link_id]["strength"] += 1
    return links


def cpc_links(patents, group="ss_classe"):
    links = {}
    for patent in patents:
        cpcs = patent.get("cpc", {}).get(group, [])
        cpcs = [f'{cpc.get("code")}###{cpc.get("label")}' for cpc in cpcs]
        co_cpc = [tuple(sorted(combinations)) for combinations in combinations(cpcs, 2)]

        for co_element in co_cpc:
            link_id = "---".join(co_element)
            source_name = co_element[0].split("###")[1]
            target_name = co_element[1].split("###")[1]

            if link_id not in links:
                links[link_id] = {"source_id": source_name, "target_id": target_name, "strength": 1}
            else:
                links[link_id]["strength"] += 1

    return links

In [43]:
persons_links(patents[0:2])

{'De Monbrison David---Fraysse Nicolas': {'source_id': 'De Monbrison David',
  'target_id': 'Fraysse Nicolas',
  'strength': 1},
 'Beynet Jean Marc---Fraysse Nicolas': {'source_id': 'Beynet Jean Marc',
  'target_id': 'Fraysse Nicolas',
  'strength': 1},
 'Beynet Jean Marc---De Monbrison David': {'source_id': 'Beynet Jean Marc',
  'target_id': 'De Monbrison David',
  'strength': 1},
 'Thorel Jean Noel---Thorel Jean-Noël': {'source_id': 'Thorel Jean Noel',
  'target_id': 'Thorel Jean-Noël',
  'strength': 1}}

In [64]:
df = pd.DataFrame.from_dict(cpc_links(patents, group="classe"), orient="index")
df.to_json("patents_cpc_links.json", orient="records")