In [25]:
import epo_ops
import os
import json
from pathlib import Path

middlewares = [
    epo_ops.middlewares.Dogpile(),
    epo_ops.middlewares.Throttler(),
]

In [26]:
# The api keys should be in a json-file in the project root, with two attributes: "key" and "secret" for example:
# { "key": "YOUR_APPLICATION_KEY", "secret": "YOUR_APPLICATION_SECRET"}
api_keys_path = Path('..') / 'api_keys.json'

In [27]:
with open(api_keys_path, 'r') as fp:
    api_keys = json.load(fp)

In [28]:
client = epo_ops.Client(
    key=api_keys['key'],
    secret=api_keys['secret'],
    middlewares=middlewares,
)

In [29]:
from epo_ops.models import Epodoc, Docdb

In [30]:
from importlib import reload
reload(epo_ops.models)
from epo_ops.models import Epodoc, Docdb

In [31]:
doc = Epodoc('EP1000000.A1')

In [32]:
doc.as_api_input()

'EP1000000.A1'

In [33]:
req = client.published_data('publication', doc, endpoint='claims')

In [34]:
req

<Response [200]>

In [35]:
req = client.published_data('publication', doc, endpoint='fulltext')
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/fulltext.xml', 'wb') as fp:
    fp.write(req.content)

In [36]:
req = client.published_data('publication', doc, endpoint='biblio')
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/biblio.xml', 'wb') as fp:
    fp.write(req.content)

In [37]:
endpoint = 'description'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

In [38]:
endpoint = 'claims'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

In [39]:
endpoint = 'images'
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

In [51]:
req.content

b'<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="/3.2/style/pub-inquiry.xsl"?>\n<ops:world-patent-data xmlns="http://www.epo.org/exchange" xmlns:ops="http://ops.epo.org" xmlns:xlink="http://www.w3.org/1999/xlink">\n    <ops:document-inquiry>\n        <ops:publication-reference>\n            <document-id document-id-type="epodoc">\n                <doc-number>EP1000000</doc-number>\n                <kind>A1</kind>\n            </document-id>\n        </ops:publication-reference>\n        <ops:inquiry-result>\n            <publication-reference>\n                <document-id document-id-type="docdb">\n                    <country>EP</country>\n                    <doc-number>1000000</doc-number>\n                    <kind>A1</kind>\n                </document-id>\n            </publication-reference>\n            <ops:document-instance system="ops.epo.org" number-of-pages="12" desc="FullDocument" link="published-data/images/EP/1000000/A1/fullimage">\n       

In [41]:
import xml.etree.ElementTree as ET

In [60]:
img_prefix = "firstpage"
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{img_prefix}.tiff', 'wb') as fp:
    fp.write(img_req.content)

In [53]:
req = client.published_data('publication', doc, endpoint=endpoint)
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
with open(f'../{doc.as_api_input()}/{endpoint}.xml', 'wb') as fp:
    fp.write(req.content)

requests.models.Response

In [42]:
endpoint = 'images'
tree = ET.parse(f'../{doc.as_api_input()}/{endpoint}.xml')

In [43]:
tree

<xml.etree.ElementTree.ElementTree at 0x7ffd6f112f70>

In [44]:
root = tree.getroot()

In [47]:
root

<Element '{http://ops.epo.org}world-patent-data' at 0x7ffd6f153720>

In [50]:
root.findall("link")

[]

In [22]:
root.findall('{http://ops.epo.org}document-instance')

[]

In [23]:
root

<Element '{http://ops.epo.org}world-patent-data' at 0x7ffd6f128e00>

In [24]:
[e.attrib for e in root.iter()]

[{},
 {},
 {},
 {'document-id-type': 'epodoc'},
 {},
 {},
 {},
 {},
 {'document-id-type': 'docdb'},
 {},
 {},
 {},
 {'system': 'ops.epo.org',
  'number-of-pages': '12',
  'desc': 'FullDocument',
  'link': 'published-data/images/EP/1000000/A1/fullimage'},
 {},
 {},
 {},
 {'name': 'ABSTRACT', 'start-page': '1'},
 {'name': 'BIBLIOGRAPHY', 'start-page': '1'},
 {'name': 'CLAIMS', 'start-page': '3'},
 {'name': 'DESCRIPTION', 'start-page': '2'},
 {'name': 'DRAWINGS', 'start-page': '5'},
 {'name': 'SEARCH_REPORT', 'start-page': '11'},
 {'system': 'ops.epo.org',
  'number-of-pages': '6',
  'desc': 'Drawing',
  'link': 'published-data/images/EP/1000000/A1/thumbnail'},
 {},
 {},
 {},
 {'name': 'DRAWINGS', 'start-page': '1'},
 {'system': 'ops.epo.org',
  'number-of-pages': '1',
  'desc': 'FirstPageClipping',
  'link': 'published-data/images/EP/1000000/PA/firstpage'},
 {},
 {},
 {},
 {},
 {},
 {'name': 'ABSTRACT', 'start-page': '1'},
 {'name': 'BIBLIOGRAPHY', 'start-page': '1'}]

In [64]:
# Extract image paths
paths = [e.attrib['link'] for e in root.iter() if 'link' in e.attrib]
# Make directory if needed
os.makedirs(f'../{doc.as_api_input()}', exist_ok=True)
# Fetch and store images
for p in paths:
    print("Get", p)
    req = client.image(p, range=1)
    name = p.split('/')[-1]
    with open(f'../{doc.as_api_input()}/{name}.tiff', 'wb') as fp:
        fp.write(req.content)

Get published-data/images/EP/1000000/A1/fullimage
Get published-data/images/EP/1000000/A1/thumbnail
Get published-data/images/EP/1000000/PA/firstpage
