In [1]:
import epo_ops
import os
import json
from pathlib import Path
import xml.etree.ElementTree as ET

middlewares = [
    epo_ops.middlewares.Dogpile(),
    epo_ops.middlewares.Throttler(),
]

In [2]:
# The api keys should be in a json-file in the project root, with two attributes: "key" and "secret" for example:
# { "key": "YOUR_APPLICATION_KEY", "secret": "YOUR_APPLICATION_SECRET"}
api_keys_path = Path('..') / 'api_keys.json'

In [3]:
with open(api_keys_path, 'r') as fp:
    api_keys = json.load(fp)

In [4]:
client = epo_ops.Client(
    key=api_keys['key'],
    secret=api_keys['secret'],
    middlewares=middlewares,
)

In [5]:
from importlib import reload
reload(epo_ops.models)
from epo_ops.models import Epodoc, Docdb

In [None]:
doc = Epodoc('EP1000000.A1')

In [None]:
doc.as_api_input()

In [None]:
req = client.published_data('publication', doc, endpoint='claims')

In [None]:
req

In [None]:
req = client.published_data('publication', doc, endpoint='fulltext')
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/fulltext.xml', 'wb') as fp:
    fp.write(req.content)

In [None]:
req = client.published_data('publication', doc, endpoint='biblio')
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/biblio.xml', 'wb') as fp:
    fp.write(req.content)

In [None]:
endpoint = 'description'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

In [None]:
endpoint = 'claims'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

In [None]:
endpoint = 'images'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

In [None]:
req.content

In [None]:
endpoint = 'images'
tree = ET.parse(f'../{doc.as_api_input()}/{endpoint}.xml')

In [None]:
tree

In [None]:
root = tree.getroot()

In [None]:
root

In [None]:
root.findall('{http://ops.epo.org}document-instance')

In [None]:
root

In [None]:
[e.attrib for e in root.iter()]

In [6]:
def fetch_data(doc_id):
    doc = Epodoc(doc_id)
    os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
    
    # Fetch XML
    for endpoint in ["fulltext", "biblio", "description", "claims", "images"]:
        print("Get", endpoint)
        req = client.published_data('publication', doc, endpoint=endpoint)
        with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
            fp.write(req.content)
            
    # Fetch images
    endpoint = "images"
    tree = ET.parse(f'../{doc.as_api_input()}/{endpoint}.xml')
    # Extract image paths
    paths = [e.attrib['link'] for e in tree.getroot().iter() if 'link' in e.attrib]
    # Get and write to disk
    for p in paths:
        print("Get", p)
        req = client.image(p, range=1)
        name = p.split('/')[-1]
        with open(f'../{doc.as_api_input()}/{name}.tiff', 'wb') as fp:
            fp.write(req.content)

In [7]:
# Search by keyphrase
query = "laser scanning microscope"
num_docs = 5
req = client.published_data_search(query, range_begin=1, range_end=num_docs)

In [8]:
# Print result
element = ET.XML(req.content)
ET.indent(element)
print(ET.tostring(element, encoding='unicode'))

<ns0:world-patent-data xmlns:ns0="http://ops.epo.org" xmlns:ns1="http://www.epo.org/exchange">
  <ns0:biblio-search total-result-count="3095">
    <ns0:query syntax="CQL">(txt = laser and txt = scanning) and txt = microscope</ns0:query>
    <ns0:range begin="1" end="5" />
    <ns0:search-result>
      <ns0:publication-reference system="ops.epo.org" family-id="69180930">
        <ns1:document-id document-id-type="docdb">
          <ns1:country>US</ns1:country>
          <ns1:doc-number>2021311091</ns1:doc-number>
          <ns1:kind>A1</ns1:kind>
        </ns1:document-id>
      </ns0:publication-reference>
      <ns0:publication-reference system="ops.epo.org" family-id="77854503">
        <ns1:document-id document-id-type="docdb">
          <ns1:country>US</ns1:country>
          <ns1:doc-number>2021302465</ns1:doc-number>
          <ns1:kind>A1</ns1:kind>
        </ns1:document-id>
      </ns0:publication-reference>
      <ns0:publication-reference system="ops.epo.org" family-id="7766

In [9]:
# Fetch data from one of the docs
doc_id = "ES2853354.A1"
fetch_data(doc_id)

Get fulltext
Get biblio
Get description
Get claims
Get images
Get published-data/images/ES/2853354/A1/thumbnail
Get published-data/images/ES/2853354/A1/fullimage
