In [8]:
import epo_ops
import os
import json
from pathlib import Path
import xml.etree.ElementTree as ET

middlewares = [
    #epo_ops.middlewares.Dogpile(), #No dogpile support on windows
    epo_ops.middlewares.Throttler(),
]

In [9]:
# The api keys should be in a json-file in the project root, with two attributes: "key" and "secret" for example:
# { "key": "YOUR_APPLICATION_KEY", "secret": "YOUR_APPLICATION_SECRET"}
api_keys_path = Path('..') / 'api_keys.json'

In [10]:
with open(api_keys_path, 'r') as fp:
    api_keys = json.load(fp)

In [57]:
client = epo_ops.Client(
    key=api_keys['key'],
    secret=api_keys['secret'],
    middlewares=middlewares,
    accept_type='json'
)

In [58]:
from importlib import reload
reload(epo_ops.models)
from epo_ops.models import Epodoc, Docdb

In [59]:
doc = Epodoc('EP3757625', 'A1')

In [51]:
docs = [Epodoc('EP3757625', 'A1'), Epodoc('EP3757258', 'A1')]

In [31]:
doc.as_api_input()

'EP1000000.A1'

In [60]:
req = client.published_data('publication', doc, endpoint='claims')

In [33]:
req

<Response [200]>

In [None]:
data = json.loads(req.content)
data

In [50]:
req = client.published_data('publication', docs, endpoint='fulltext')

In [37]:
req.content

b'<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="/3.2/style/pub-inquiry.xsl"?>\n<ops:world-patent-data xmlns="http://www.epo.org/exchange" xmlns:ops="http://ops.epo.org" xmlns:xlink="http://www.w3.org/1999/xlink">\n    <ops:fulltext-inquiry>\n        <ops:publication-reference>\n            <document-id document-id-type="epodoc">\n                <doc-number>EP1000000</doc-number>\n                <kind>A1</kind>\n            </document-id>\n        </ops:publication-reference>\n        <ops:inquiry-result>\n            <publication-reference>\n                <document-id document-id-type="docdb">\n                    <country>EP</country>\n                    <doc-number>1000000</doc-number>\n                    <kind>A1</kind>\n                </document-id>\n            </publication-reference>\n            <ops:fulltext-instance system="ops.epo.org" lang="EN" desc="description">\n                <ops:fulltext-format-options>\n                    <ops:

In [38]:
req = client.published_data('publication', doc, endpoint='fulltext')
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/fulltext.xml', 'wb') as fp:
    fp.write(req.content)

In [39]:
req = client.published_data('publication', doc, endpoint='biblio')
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/biblio.xml', 'wb') as fp:
    fp.write(req.content)

In [66]:
endpoint = 'description'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.json', 'wb') as fp:
    fp.write(req.content)
print(json.loads(req.content))

{'ops:world-patent-data': {'@xmlns': {'ops': 'http://ops.epo.org', '$': 'http://www.epo.org/exchange', 'xlink': 'http://www.w3.org/1999/xlink'}, 'ftxt:fulltext-documents': {'ftxt:fulltext-document': {'@system': 'ops.epo.org', '@fulltext-format': 'text-only', 'bibliographic-data': {'publication-reference': {'@data-format': 'docdb', 'document-id': {'country': {'$': 'EP'}, 'doc-number': {'$': '3757625'}, 'kind': {'$': 'A1'}}}}, 'description': {'@lang': 'EN', 'p': [{'$': 'TECHNICAL FIELD'}, {'$': '[0001]    The present invention relates to an anti-reflection film including a base material layer and a low-refractive-index layer having a refractive index lower than that of the base material layer, and a layered product film having the anti-reflection film.'}, {'$': 'BACKGROUND ART'}, {'$': '[0002]    A layered film having a surface having low reflectivity which can be used as an anti-reflection film is conventionally known (see Patent Document 1). Layered films having low surface reflectivit

In [67]:
json.loads(req.content)

{'ops:world-patent-data': {'@xmlns': {'ops': 'http://ops.epo.org',
   '$': 'http://www.epo.org/exchange',
   'xlink': 'http://www.w3.org/1999/xlink'},
  'ftxt:fulltext-documents': {'ftxt:fulltext-document': {'@system': 'ops.epo.org',
    '@fulltext-format': 'text-only',
    'bibliographic-data': {'publication-reference': {'@data-format': 'docdb',
      'document-id': {'country': {'$': 'EP'},
       'doc-number': {'$': '3757625'},
       'kind': {'$': 'A1'}}}},
    'description': {'@lang': 'EN',
     'p': [{'$': 'TECHNICAL FIELD'},
      {'$': '[0001]    The present invention relates to an anti-reflection film including a base material layer and a low-refractive-index layer having a refractive index lower than that of the base material layer, and a layered product film having the anti-reflection film.'},
      {'$': 'BACKGROUND ART'},
      {'$': '[0002]    A layered film having a surface having low reflectivity which can be used as an anti-reflection film is conventionally known (see P

In [41]:
endpoint = 'claims'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

In [68]:
endpoint = 'images'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)
json.loads(req.content)

{'ops:world-patent-data': {'@xmlns': {'ops': 'http://ops.epo.org',
   '$': 'http://www.epo.org/exchange',
   'xlink': 'http://www.w3.org/1999/xlink'},
  'ops:document-inquiry': {'ops:publication-reference': {'document-id': {'@document-id-type': 'epodoc',
     'doc-number': {'$': 'EP3757625'},
     'kind': {'$': 'A1'}}},
   'ops:inquiry-result': {'publication-reference': {'document-id': {'@document-id-type': 'docdb',
      'country': {'$': 'EP'},
      'doc-number': {'$': '3757625'},
      'kind': {'$': 'A1'}}},
    'ops:document-instance': [{'@system': 'ops.epo.org',
      '@number-of-pages': '35',
      '@desc': 'FullDocument',
      '@link': 'published-data/images/EP/3757625/A1/fullimage',
      'ops:document-format-options': {'ops:document-format': [{'$': 'application/pdf'},
        {'$': 'application/tiff'}]},
      'ops:document-section': [{'@name': 'ABSTRACT', '@start-page': '1'},
       {'@name': 'BIBLIOGRAPHY', '@start-page': '1'},
       {'@name': 'CLAIMS', '@start-page': '24'

In [109]:
image_inquery_result = data['ops:world-patent-data']['ops:document-inquiry']['ops:inquiry-result']['ops:document-instance']
for res in image_inquery_result:
    if res.get('@desc', None) == 'Drawing':
        n_pages = int(res['@number-of-pages'])
        request_url = res['@link']
        for i in range(1, n_pages+1):
            req = client.image(request_url, range=i, document_format='application/tiff')
            name = f'{i:02}'
            output_dir = Path('..') / f'{doc.as_api_input()}' / 'Drawing'
            output_dir.mkdir(exist_ok=True)
            with open(output_dir / f'{name}.tiff', 'wb') as fp:
                fp.write(req.content)
            

In [None]:
def retrieve_images(doc):
    endpoint = 'images'
    req = client.published_data('publication', doc, endpoint=endpoint)
    os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
    with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
        fp.write(req.content)
    json.loads(req.content)

In [69]:
data = json.loads(req.content)

In [79]:
image_inquery_result = data['ops:world-patent-data']['ops:document-inquiry']['ops:inquiry-result']['ops:document-instance']
image_inquery_result

[{'@system': 'ops.epo.org',
  '@number-of-pages': '35',
  '@desc': 'FullDocument',
  '@link': 'published-data/images/EP/3757625/A1/fullimage',
  'ops:document-format-options': {'ops:document-format': [{'$': 'application/pdf'},
    {'$': 'application/tiff'}]},
  'ops:document-section': [{'@name': 'ABSTRACT', '@start-page': '1'},
   {'@name': 'BIBLIOGRAPHY', '@start-page': '1'},
   {'@name': 'CLAIMS', '@start-page': '24'},
   {'@name': 'DESCRIPTION', '@start-page': '2'},
   {'@name': 'DRAWINGS', '@start-page': '27'},
   {'@name': 'SEARCH_REPORT', '@start-page': '33'}]},
 {'@system': 'ops.epo.org',
  '@number-of-pages': '6',
  '@desc': 'Drawing',
  '@link': 'published-data/images/EP/3757625/A1/thumbnail',
  'ops:document-format-options': {'ops:document-format': [{'$': 'application/pdf'},
    {'$': 'application/tiff'}]},
  'ops:document-section': {'@name': 'DRAWINGS', '@start-page': '1'}}]

In [104]:
req = client.image('published-data/images/EP/3757625/A1/thumbnail', range=1, document_format='application/tiff')

In [95]:
name = 'foo2'
with open(f'../{doc.as_api_input()}/{name}.tiff', 'wb') as fp:
    fp.write(req.content)

In [None]:
endpoint = 'images'
tree = ET.parse(f'../{doc.as_api_input()}/{endpoint}.xml')

In [None]:
tree

In [None]:
root = tree.getroot()

In [None]:
root

In [None]:
root.findall('{http://ops.epo.org}document-instance')

In [None]:
root

In [None]:
[e.attrib for e in root.iter()]

In [48]:
def fetch_data(doc_id):
    doc = Epodoc(doc_id)
    os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
    
    # Fetch XML
    for endpoint in ["fulltext", "biblio", "description", "claims", "images"]:
        print("Get", endpoint)
        req = client.published_data('publication', doc, endpoint=endpoint)
        with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
            fp.write(req.content)
    return        
    # Fetch images
    endpoint = "images"
    tree = ET.parse(f'../{doc.as_api_input()}/{endpoint}.xml')
    # Extract image paths
    paths = [e.attrib['link'] for e in tree.getroot().iter() if 'link' in e.attrib]
    # Get and write to disk
    for p in paths:
        print("Get", p)
        req = client.image(p, range=1)
        name = p.split('/')[-1]
        with open(f'../{doc.as_api_input()}/{name}.tiff', 'wb') as fp:
            fp.write(req.content)

In [None]:
# Search by keyphrase
query = "laser scanning microscope"
num_docs = 5
req = client.published_data_search(query, range_begin=1, range_end=num_docs)

In [49]:
fetch_data('EP3757258.A1')

Get fulltext
Get biblio
Get description
Get claims
Get images


In [None]:
# Print result
element = ET.XML(req.content)
ET.indent(element)
print(ET.tostring(element, encoding='unicode'))

<ns0:world-patent-data xmlns:ns0="http://ops.epo.org" xmlns:ns1="http://www.epo.org/exchange">
  <ns0:biblio-search total-result-count="3095">
    <ns0:query syntax="CQL">(txt = laser and txt = scanning) and txt = microscope</ns0:query>
    <ns0:range begin="1" end="5" />
    <ns0:search-result>
      <ns0:publication-reference system="ops.epo.org" family-id="69180930">
        <ns1:document-id document-id-type="docdb">
          <ns1:country>US</ns1:country>
          <ns1:doc-number>2021311091</ns1:doc-number>
          <ns1:kind>A1</ns1:kind>
        </ns1:document-id>
      </ns0:publication-reference>
      <ns0:publication-reference system="ops.epo.org" family-id="77854503">
        <ns1:document-id document-id-type="docdb">
          <ns1:country>US</ns1:country>
          <ns1:doc-number>2021302465</ns1:doc-number>
          <ns1:kind>A1</ns1:kind>
        </ns1:document-id>
      </ns0:publication-reference>
      <ns0:publication-reference system="ops.epo.org" family-id="7766

In [None]:
# Fetch data from one of the docs
doc_id = "ES2853354.A1"
fetch_data(doc_id)

Get fulltext
Get biblio
Get description
Get claims
Get images
Get published-data/images/ES/2853354/A1/thumbnail
Get published-data/images/ES/2853354/A1/fullimage
