# Finding data of patents in Switzerland...

In [1]:
import requests
import json
import pandas as pd

## Getting Patent data from EPO

In [2]:
def get_patent_list(query, begin, end):
    url = ("http://ops.epo.org/3.1/rest-services/published-data/search?"
           "q={query}&Range={begin}-{end}".format(query=query, begin=begin, end=end))
    
    with requests.session() as s:
        response = requests.get(url,
                               headers={"Accept": "application/json"})
        
    return response.json()["ops:world-patent-data"]["ops:biblio-search"]["ops:search-result"]["ops:publication-reference"]

In [3]:
#get_patent_list("switzerland", 1, 10)

In [4]:
def get_abstract(country, doc_number):
    url = ("http://ops.epo.org/3.1/rest-services/published-data/publication/epodoc/"
           "{doc_number}/abstract".format(doc_number=country+doc_number))
    with requests.session() as s:
        response = requests.get(url,
                               headers={"Accept": "application/json"})

        response = dict(response.json())
        
        abstract = response["ops:world-patent-data"]["exchange-documents"]["exchange-document"]["abstract"]
        language = abstract["@lang"]
        abstract_text = abstract["p"]["$"]
        
    return language, abstract_text
    

In [5]:
get_abstract("US", "2016304634")

('en',
 'The present invention relates to ionomers comprising a reaction product of the reaction between a halogenated isoolefin copolymer and at least one phosphorus based nucleophile comprising at least one pendant vinyl group. The present invention also relates to a method of preparing and curing these ionomers.')

In [6]:
def extract_id(patent_json):
    return (patent_json["document-id"]["country"]["$"],
           patent_json["document-id"]["doc-number"]["$"],
           patent_json["document-id"]["kind"]["$"])

In [7]:
patent_list = get_patent_list("switzerland", 1001, 1100)

In [45]:
en_abstracts = []
for p in patent_list:
    try:
        (country,doc_number,_) = extract_id(p)
        lan, abstract = get_abstract(country,doc_number)
        if lan=='en':
            en_abstracts.append(abstract)
    except:
        pass
    

In [9]:
import time

In [35]:
patent_list = get_patent_list("switzerland", 101, 200)

In [15]:
[t for t in range(1,1000,100)]

[1, 101, 201, 301, 401, 501, 601, 701, 801, 901]

In [15]:
for start in range(1,1000,100):
    print(start, start+99)
    patent_list = []
    end = start + 99
    patent_list = get_patent_list("switzerland", start, end)
    time.sleep(1)
    en_abstracts = []
    for p in patent_list:
        try:
            (country,doc_number,_) = extract_id(p)
            #print(country, doc_number,"\n")
            lan, abstract = get_abstract(country,doc_number)
            if lan=='en':
                #print(abstract)
                en_abstracts.append(abstract)
        except:
            pass
    

1 100
101 200


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# len(en_abstracts)

In [43]:
[(t,t+100) for t in range(1,1000,100)]

[(1, 101),
 (101, 201),
 (201, 301),
 (301, 401),
 (401, 501),
 (501, 601),
 (601, 701),
 (701, 801),
 (801, 901),
 (901, 1001)]

In [11]:
patent_list

[{'@family-id': '48537438',
  '@system': 'ops.epo.org',
  'document-id': {'@document-id-type': 'docdb',
   'country': {'$': 'HK'},
   'doc-number': {'$': '1216631'},
   'kind': {'$': 'A1'}}},
 {'@family-id': '40342378',
  '@system': 'ops.epo.org',
  'document-id': {'@document-id-type': 'docdb',
   'country': {'$': 'US'},
   'doc-number': {'$': '2016340462'},
   'kind': {'$': 'A1'}}},
 {'@family-id': '57320232',
  '@system': 'ops.epo.org',
  'document-id': {'@document-id-type': 'docdb',
   'country': {'$': 'US'},
   'doc-number': {'$': '2016341568'},
   'kind': {'$': 'A1'}}},
 {'@family-id': '57320232',
  '@system': 'ops.epo.org',
  'document-id': {'@document-id-type': 'docdb',
   'country': {'$': 'WO'},
   'doc-number': {'$': '2016186871'},
   'kind': {'$': 'A1'}}},
 {'@family-id': '44927889',
  '@system': 'ops.epo.org',
  'document-id': {'@document-id-type': 'docdb',
   'country': {'$': 'DK'},
   'doc-number': {'$': '2535540'},
   'kind': {'$': 'T3'}}},
 {'@family-id': '57288044',
  '

## Patent extract from Deepdive

In [4]:
patents_df = pd.read_csv("patent.tsv", sep="\t")
patents_df.head()

Unnamed: 0,02349994.nlp,1,"{""1"",""2"",""3"",""4"",""5"",""6"",""7"",""8"",""9"",""10"",""11"",""12""}","{""WKU"",""02349994DWKU"",""2349994APT"",""ADID"",""US"",""2349994"",""ATTL"",""OCR"",""SCANNED"",""DOCUMENTDSRC"",""OCLPAR"","".""}","{""NNP"",""NNP"",""NNP"",""NNP"",""NNP"",""CD"",""NNP"",""NN"",""VBD"",""JJ"",""NN"","".""}","{""ORGANIZATION"",""O"",""O"",""O"",""LOCATION"",""NUMBER"",""O"",""O"",""O"",""O"",""O"",""O""}","{""WKU"",""02349994DWKU"",""2349994APT"",""ADID"",""US"",""2349994"",""ATTL"",""ocr"",""scan"",""documentdsrc"",""oclpar"","".""}","{""nn"",""nn"",""nn"",""nn"",""nn"",""num"",""nn"",""nsubj"",""null"",""amod"",""dobj"",""null""}","{""8"",""8"",""8"",""8"",""8"",""8"",""8"",""9"",""0"",""11"",""9"",""0""}"
0,02349994.nlp,2,"{""1"",""2"",""3"",""4"",""5""}","{""May"",""30"","","",""1944"","".""}","{""NNP"",""CD"","","",""CD"","".""}","{""DATE"",""DATE"",""DATE"",""DATE"",""O""}","{""May"",""30"","","",""1944"","".""}","{""null"",""num"",""null"",""num"",""null""}","{""0"",""1"",""0"",""1"",""0""}"
1,02349994.nlp,3,"{""1"",""2""}","{""1"","".""}","{""LS"","".""}","{""NUMBER"",""O""}","{""1"","".""}","{""null"",""null""}","{""0"",""0""}"
2,02349994.nlp,4,"{""1"",""2"",""3"",""4"",""5"",""6"",""7"",""8"",""9"",""10"",""11""...","{""J."",""SNADER"",""2,349,994"",""ABRADING"",""TOOL"",""...","{""NNP"",""NNP"",""CD"",""NNP"",""NNP"",""VBD"",""NNP"",""CD""...","{""PERSON"",""PERSON"",""NUMBER"",""O"",""O"",""O"",""DATE""...","{""J."",""SNADER"",""2,349,994"",""ABRADING"",""TOOL"",""...","{""nn"",""nn"",""num"",""nn"",""nsubj"",""null"",""tmod"",""n...","{""5"",""5"",""5"",""5"",""6"",""0"",""6"",""7"",""0"",""12"",""12""..."
3,02349994.nlp,5,"{""1"",""2"",""3""}","{""-LRB-"",""Cl"","".""}","{""-LRB-"",""NN"","".""}","{""O"",""O"",""O""}","{""-lrb-"",""cl"","".""}","{""null"",""null"",""null""}","{""0"",""0"",""0""}"
4,02349994.nlp,6,"{""1"",""2"",""3"",""4"",""5"",""6"",""7"",""8"",""9"",""10"",""11""...","{""51-184"","".3"",""-RRB-"",""The"",""invention"",""rela...","{""JJ"",""NN"",""-RRB-"",""DT"",""NN"",""VBZ"",""RB"",""TO"",""...","{""O"",""NUMBER"",""O"",""O"",""O"",""O"",""O"",""O"",""O"",""O"",...","{""51-184"","".3"",""-rrb-"",""the"",""invention"",""rela...","{""amod"",""nsubj"",""null"",""det"",""nsubj"",""null"",""a...","{""2"",""6"",""0"",""5"",""6"",""0"",""6"",""9"",""6"",""0"",""9"",""..."
