# EPAB Library Primer

In [1]:
%load_ext autoreload
%autoreload 2

### Import the library and instantiate an EPAB client

In [2]:
from epo.tipdata.epab import EPABClient
epab = EPABClient()

### Show and get help on the Database schema 

In [7]:
epab.fields()

WidDatabaseFields(header='', input_data={'': [{'name': 'epab_doc_id', 'type': 'STRING', 'mode': 'REQUIRED', 'd…

## Prebuild queries

### Querying the publication values

#### Combined query of multiple publication attributes 

In [25]:
q = epab.query_publication(number="41%", kindcodes="A1,A2", language="FR, DE", date_range="20180101-20230101")

In [30]:
print(q)
q.get_results("publication", limit=5)

8 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language
0,EP,4102526,A1,20221214,FR
1,EP,4100650,A1,20221214,FR
2,EP,4101359,A1,20221214,DE
3,EP,4101318,A1,20221214,DE
4,EP,4108786,A1,20221228,DE


#### Querying by specific attributes

##### Kindcode

In [6]:
q = epab.query_publication_kindcodes("A8")
print(q)
q.get_results("publication.kind, publication.number", limit=5)

4264 publications


Unnamed: 0,publication.kind,publication.number
0,A8,3739868
1,A8,3739604
2,A8,1810681
3,A8,4026688
4,A8,4043040


##### Publication language

In [7]:
q = epab.query_publication_languages("FR,DE")
print(q)
q.get_results("publication, ipc", limit=5)

1849987 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language,ipc
0,EP,3167122,B8,20221109,FR,"[{'version': '8', 'symbol': 'E01C7/24', 'categ..."
1,EP,4060215,A8,20221109,DE,"[{'version': '8', 'symbol': 'F16L21/00', 'cate..."
2,EP,4007053,A3,20221109,DE,"[{'version': '8', 'symbol': 'H01M50/209', 'cat..."
3,EP,4046925,A3,20221109,DE,"[{'version': '8', 'symbol': 'B65B67/08', 'cate..."
4,EP,4046663,A3,20221109,DE,"[{'version': '8', 'symbol': 'A61L9/20', 'categ..."


##### Publication date (range or single date)

In [8]:
q = epab.query_publication_date("20180103")
print(q)
q.get_results("publication", limit=5)

5838 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language
0,EP,2668266,B1,20180103,EN
1,EP,2668269,B1,20180103,EN
2,EP,2763694,B1,20180103,EN
3,EP,2736287,B1,20180103,EN
4,EP,2751193,B1,20180103,EN


##### Publication date can also be searched using truncation

In [9]:
q = epab.query_publication_date("2021%")
print(q)
q.get_results("publication", limit=5)

286062 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language
0,EP,3548501,B1,20210217,EN
1,EP,3399767,B1,20210217,EN
2,EP,3399769,B1,20210217,EN
3,EP,3515827,B1,20210217,DE
4,EP,3497048,B1,20210217,DE


### Querying the application values

#### Combined query method

In [32]:
q = epab.query_application(number="167%", date_range="20160401-20160430")
print(q)
q.get_results("application", limit=5)

101 publications


Unnamed: 0,application.number,application.filing_date
0,16776881.1,20160407
1,16717192.5,20160407
2,16717592.6,20160413
3,16717232.9,20160408
4,16787193.8,20160428


#### Querying by specific attributes

##### Number

In [33]:
q = epab.query_application_number("167%")
print(q)
q.get_results("application, inventor, applicant", limit=5)

844 publications


Unnamed: 0,application.number,application.filing_date,inventor,applicant
0,16731678.5,20160405,"[{'name': 'Rus, Adrian Ioan', 'address': 'Stra...","[{'name': 'Rus, Adrian Ioan', 'address': 'Stra..."
1,16759753.3,20160831,"[{'name': 'JANCZURA, Krzysztof', 'address': 'S...","[{'name': 'Pfeifer Holding GmbH & Co. KG', 'ad..."
2,16732276.7,20160623,"[{'name': 'AHMED, Abu, Shohel', 'address': 'Ot...",[{'name': 'Telefonaktiebolaget LM Ericsson (pu...
3,16705480.8,20160216,"[{'name': 'KRECHEL, Andreas', 'address': 'Gart...","[{'name': 'Sensus Spectrum LLC', 'address': '8..."
4,16732670.1,20160629,"[{'name': 'KÖHN, Arnim', 'address': 'Ringstras...",[{'name': 'Bayer CropScience Aktiengesellschaf...


##### Filing date

In [12]:
q = epab.query_application_filing_date("20160202")
print(q)
q.get_results("publication.number, application.number", limit=5)

1009 publications


Unnamed: 0,publication.number,application.number
0,3256827,16702548.5
1,3256401,16704151.6
2,3257069,16702558.4
3,3257182,16704352.0
4,3257208,16708501.8


### Querying the inventor, applicant and representative

#### Querying by specific attributes

##### Inventor name, city, country, address

In [13]:
q = epab.query_inventor_name("%Michael%")
print(q)
q.get_results("publication, inventor", limit=5)

337800 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language,inventor
0,EP,701798,A1,19960320,EN,"[{'name': 'Abidin, Michael R.', 'address': '18..."
1,EP,2868320,A1,20150506,EN,"[{'name': 'May, Michael', 'address': 'Industri..."
2,EP,2868321,A1,20150506,EN,"[{'name': 'May, Michael', 'address': 'Industri..."
3,EP,2867255,A1,20150506,EN,"[{'name': 'HEINDL, Dieter', 'address': 'Magnol..."
4,EP,415188,B1,19960320,EN,"[{'name': 'Kaufman, Richard Allen', 'address':..."


In [14]:
q = epab.query_inventor_city("%Boston%")
print(q)
q.get_results("inventor.city", output_type="list", limit=5)

15157 publications


[{'inventor': [{'city': 'Yokohama-shi'},
   {'city': 'Kawasaki-shi'},
   {'city': 'Boston, MA 02114'},
   {'city': 'Kawasaki-shi'}]},
 {'inventor': [{'city': '806, Boston,\nMassachusetts 02116'}]},
 {'inventor': [{'city': 'Boston, MA 02115'}]},
 {'inventor': [{'city': 'Melrose, MA 02176'},
   {'city': 'Andover, MA 01810'},
   {'city': 'Andover, MA 01810'},
   {'city': 'Boston, MA 02118'},
   {'city': 'Andover, MA 01810-3417'},
   {'city': 'Manchester, NH 03104'},
   {'city': 'Newton Highlands, MA 02461-1849'},
   {'city': 'Arlington, MA 02476'},
   {'city': 'Chelmsford, MA 01824'}]},
 {'inventor': [{'city': 'Wellesley, MA 02181'}, {'city': 'Boston, MA 02114'}]}]

In [15]:
q = epab.query_inventor_country("GR, IT")
print(q)
q.get_results("publication, inventor.country", limit=5)

268419 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language,inventor
0,EP,4289817,A1,20231213,EN,"[{'country': 'IT'}, {'country': 'IT'}]"
1,EP,4290776,A1,20231213,EN,"[{'country': 'IT'}, {'country': 'IT'}, {'count..."
2,EP,4289769,A1,20231213,EN,[{'country': 'IT'}]
3,EP,4290442,A1,20231213,EN,"[{'country': 'IT'}, {'country': 'IT'}, {'count..."
4,EP,4290007,A1,20231213,EN,"[{'country': 'IT'}, {'country': 'IT'}, {'count..."


##### Applicant name, city, country, address

In [16]:
# methods are substantially the same as for the inventor. E.g. 

q = epab.query_applicant_country("LT")
print(q)
q.get_results("publication, applicant.country", limit=5)

768 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language,applicant
0,EP,3893011,B1,20231227,EN,[{'country': 'LT'}]
1,EP,3840192,A1,20210623,EN,"[{'country': 'LT'}, {'country': 'LT'}]"
2,EP,3711122,A1,20200923,EN,[{'country': 'LT'}]
3,EP,2827461,A3,20150624,EN,[{'country': 'LT'}]
4,EP,1837346,A2,20070926,EN,[{'country': 'LT'}]


##### Representative name, city, country, address

In [17]:
# methods are substantially the same as for the inventor and applicant. E.g. 

q = epab.query_representative_name("%Haseltine%")
print(q)
q.get_results("publication.language, representative", limit=5)

16251 publications


Unnamed: 0,publication.language,representative.name,representative.address,representative.city,representative.country
0,EN,Haseltine Lake Kempner LLP,One Portwall Square \nPortwall Lane,Bristol BS1 6BH,GB
1,EN,Haseltine Lake Kempner LLP,One Portwall Square \nPortwall Lane,Bristol BS1 6BH,GB
2,EN,Haseltine Lake Kempner LLP,One Portwall Square \nPortwall Lane,Bristol BS1 6BH,GB
3,EN,Haseltine Lake Kempner LLP,Cheapside House \n138 Cheapside,London EC2V 6BJ,GB
4,EN,Haseltine Lake Kempner LLP,One Portwall Square \nPortwall Lane,Bristol BS1 6BH,GB


### Querying the IPC and CPC classifications

#### Querying by specific attributes

##### IPC

In [34]:
# Let's look for all the publications having one of two possible IPC symbols assigned 

q = epab.query_ipc(symbols="H04W36%, H04L12%")
print(q)
q.get_results("ipc.symbol", output_type="list", limit=5)

1402 publications


[{'ipc': [{'symbol': 'H04L12/00'}, {'symbol': 'H04L29/08'}]},
 {'ipc': [{'symbol': 'H04Q11/04'}, {'symbol': 'H04L12/64'}]},
 {'ipc': [{'symbol': 'H04L12/24'}]},
 {'ipc': [{'symbol': 'H04L12/24'}, {'symbol': 'H04W16/02'}]},
 {'ipc': [{'symbol': 'H04W36/22'}, {'symbol': 'H04W72/12'}]}]

In [35]:
# We can also find all publications having *all* IPC symbols present, using the match_all parameter

q = epab.query_ipc(symbols=["H04W36%", "H04L12%"], match_all=True)
print(q)
q.get_results("ipc.symbol", output_type="list", limit=5)

8 publications


[{'ipc': [{'symbol': 'H04W36/14'},
   {'symbol': 'H04L12/28'},
   {'symbol': 'H04W36/00'}]},
 {'ipc': [{'symbol': 'H04W72/12'},
   {'symbol': 'H04B17/00'},
   {'symbol': 'H04W52/34'},
   {'symbol': 'H04W72/04'},
   {'symbol': 'H04W72/08'},
   {'symbol': 'H04B17/382'},
   {'symbol': 'H04B17/345'},
   {'symbol': 'H04J11/00'},
   {'symbol': 'H04L1/20'},
   {'symbol': 'H04W24/08'},
   {'symbol': 'H04W36/00'},
   {'symbol': 'H04W36/18'},
   {'symbol': 'H04W52/40'},
   {'symbol': 'H04W28/02'},
   {'symbol': 'H04W52/12'},
   {'symbol': 'H04W52/24'},
   {'symbol': 'H04W52/38'},
   {'symbol': 'H04L12/911'},
   {'symbol': 'H04L12/801'},
   {'symbol': 'H04W92/12'},
   {'symbol': 'H04W92/20'},
   {'symbol': 'H04L5/00'}]},
 {'ipc': [{'symbol': 'H04W36/14'},
   {'symbol': 'H04W36/22'},
   {'symbol': 'H04W24/10'},
   {'symbol': 'H04L12/24'}]},
 {'ipc': [{'symbol': 'H04W36/00'}, {'symbol': 'H04L12/28'}]},
 {'ipc': [{'symbol': 'H04L29/06'},
   {'symbol': 'H04L12/14'},
   {'symbol': 'H04L12/54'},
   {'s

In [20]:
# Let's also limit a query to IPC-8 version (also called IPCR) 

q = epab.query_ipc(symbols=["H04W36/00", "H04L12/56"], ipc8_only=True)
print(q)
q.get_results("ipc.symbol, ipc.version", output_type="list", limit=5)

23960 publications


[{'ipc': [{'symbol': 'H04L12/56', 'version': '8'}]},
 {'ipc': [{'symbol': 'H04L29/06', 'version': '8'},
   {'symbol': 'H04L12/56', 'version': '8'}]},
 {'ipc': [{'symbol': 'H04L12/28', 'version': '8'},
   {'symbol': 'H04L12/56', 'version': '8'},
   {'symbol': 'H04Q7/38', 'version': '8'}]},
 {'ipc': [{'symbol': 'H04L12/56', 'version': '8'},
   {'symbol': 'H04Q7/38', 'version': '8'}]},
 {'ipc': [{'symbol': 'H04L12/56', 'version': '8'}]}]

In [21]:
# Let's now query for two symbols which should be both present and assigned as "invention" symbols

q = epab.query_ipc(symbols="H04W36/00, H04W84%", match_all=True, invention=True, ipc8_only=True)
print(q)
q.get_results("ipc.symbol, ipc.category", output_type="list", limit=5)

386 publications


[{'ipc': [{'symbol': 'H04W4/90', 'category': 'Inv'},
   {'symbol': 'H04W72/04', 'category': 'Inv'},
   {'symbol': 'H04W76/23', 'category': 'Inv'},
   {'symbol': 'H04W4/80', 'category': 'Inv'},
   {'symbol': 'H04W76/14', 'category': 'Inv'},
   {'symbol': 'H04W4/08', 'category': 'Inv'},
   {'symbol': 'H04W84/04', 'category': 'Inv'},
   {'symbol': 'H04W72/12', 'category': 'Inv'},
   {'symbol': 'H04W8/00', 'category': 'Inv'},
   {'symbol': 'H04W36/36', 'category': 'Inv'},
   {'symbol': 'H04W36/00', 'category': 'Inv'}]},
 {'ipc': [{'symbol': 'H04L29/12', 'category': 'Inv'},
   {'symbol': 'H04W36/00', 'category': 'Inv'},
   {'symbol': 'H04W48/02', 'category': 'Inv'},
   {'symbol': 'H04W48/14', 'category': 'Inv'},
   {'symbol': 'H04W84/10', 'category': 'Inv'},
   {'symbol': 'H04W84/12', 'category': 'Inv'},
   {'symbol': 'H04W88/08', 'category': 'Inv'}]},
 {'ipc': [{'symbol': 'H04W8/20', 'category': 'Inv'},
   {'symbol': 'H04W36/32', 'category': 'Inv'},
   {'symbol': 'H04W48/20', 'category': '

##### CPC

In [10]:
# CPC works very similarly to IPC. Here a simple example:

q = epab.query_cpc(symbols="H04L69%, H04L12%", match_all=True)
print(q)
q.get_results("cpc.symbol", output_type="list", limit=5)

0 publications


[]

In [11]:
q = epab.query_cpc(symbols="H04W88%, H04W92%", match_all=True, invention=False)
print(q)
q.get_results("cpc.symbol, cpc.category", output_type="list", limit=5)

3 publications


[{'cpc': [{'symbol': 'H04L1/1819', 'category': 'Inv'},
   {'symbol': 'H04L5/0032', 'category': 'Inv'},
   {'symbol': 'H04L5/0037', 'category': 'Add'},
   {'symbol': 'H04L5/0053', 'category': 'Inv'},
   {'symbol': 'H04L5/0055', 'category': 'Add'},
   {'symbol': 'H04L5/0094', 'category': 'Inv'},
   {'symbol': 'H04W92/18', 'category': 'Add'},
   {'symbol': 'H04W72/02', 'category': 'Add'},
   {'symbol': 'H04L1/1887', 'category': 'Inv'},
   {'symbol': 'H04W88/04', 'category': 'Add'}]},
 {'cpc': [{'symbol': 'Y02D30/70', 'category': 'Add'},
   {'symbol': 'H04W40/22', 'category': 'Inv'},
   {'symbol': 'H04W88/04', 'category': 'Add'},
   {'symbol': 'H04L5/0053', 'category': 'Inv'},
   {'symbol': 'H04L5/0048', 'category': 'Inv'},
   {'symbol': 'H04W40/246', 'category': 'Add'},
   {'symbol': 'H04W92/18', 'category': 'Add'},
   {'symbol': 'H04W48/16', 'category': 'Add'}]},
 {'cpc': [{'symbol': 'H04W76/14', 'category': 'Inv'},
   {'symbol': 'H04W88/04', 'category': 'Add'},
   {'symbol': 'H04W76/30'

### Querying the Full-Text

##### Titles

In [24]:
# Searching a set of possible terms (e.g. synonyms)
q = epab.query_title(search_terms="covid, corona virus, coronavirus", language="EN", ignore_case=True)
print(q)
q.get_results("title.en, applicant.name, applicant.country", output_type="list", limit=5)

973 publications


[{'title': {'en': 'Canine corona virus vaccine'},
  'applicant': [{'name': 'Akzo Nobel N.V.', 'country': 'NL'}]},
 {'title': {'en': 'ANTIGENIC PEPTIDES OF SARS CORONAVIRUS AND USES THEREOF'},
  'applicant': [{'name': 'Crucell Holland B.V.', 'country': 'NL'}]},
 {'title': {'en': 'PEPTIDE FOR PREVENTION OR TREATMENT OF COVID-19'},
  'applicant': [{'name': 'APEPTICO Forschung und Entwicklung GmbH',
    'country': 'AT'}]},
 {'title': {'en': 'Pantropic canine coronavirus'},
  'applicant': [{'name': 'Buonavoglia, Canio', 'country': 'IT'}]},
 {'title': {'en': 'A SURROGATE CELL-BASED CORONA VIRUS SPIKE PROTEIN BLOCKING ASSAY'},
  'applicant': [{'name': 'Friedrich-Alexander-Universität Erlangen-Nürnberg',
    'country': 'DE'}]}]

In [25]:
# We can also look for having multiple terms in the same title
q = epab.query_title(search_terms="impfstoff, virus", match_all=True, language="DE", ignore_case=True)
print(q)
q.get_results("title.de, applicant", output_type="list", limit=5)

1789 publications


[{'title': {'de': 'IMPFSTOFF GEGEN RESPIRATORISCHES SYNZYTIALVIRUS'},
  'applicant': [{'name': 'VIB VZW',
    'address': 'Rijvisschestraat 120',
    'city': '9052 Gent',
    'country': 'BE'},
   {'name': 'Universiteit Gent',
    'address': 'Sint-Pietersnieuwstraat 25',
    'city': '9000 Gent',
    'country': 'BE'}]},
 {'title': {'de': 'UNIVERSELLE INFLUENZA-IMPFSTOFFE MIT VIRUSARTIGEN PARTIKELN'},
  'applicant': [{'name': 'Technovax, Inc.',
    'address': '765 Old Saw Mill River Road',
    'city': 'Tarrytown, New York 10591',
    'country': 'US'}]},
 {'title': {'de': 'Kombinationsimpfstoff gegen das Streptococcus pneumoniae und Respiratorische Synzytialvirus (RSV)'},
  'applicant': [{'name': 'GlaxoSmithKline Biologicals s.a.',
    'address': "rue de l'Institut 89",
    'city': '1330 Rixensart',
    'country': 'BE'}]},
 {'title': {'de': 'REKOMBINANTES VACCINIA-VIRUS ALS IMPFSTOFF GEGEN DAS MAREK-VIRUS'},
  'applicant': [{'name': 'VIROGENETICS CORPORATION',
    'address': '465 Jordan Roa

In [26]:
print(q.get_raw_query(""))

WITH
q1 as (
  SELECT DISTINCT epab_doc_id FROM `p-epo-tip-prj-3a1f.p_epo_tip_euwe4_bqd_epab.publications` WHERE LOWER(title.de) like '%impfstoff%' AND LOWER(title.de) like '%virus%'
),

q_main as (
  SELECT epab_doc_id FROM q1
)

SELECT

FROM `p-epo-tip-prj-3a1f.p_epo_tip_euwe4_bqd_epab.publications` WHERE epab_doc_id IN (SELECT DISTINCT epab_doc_id FROM q_main);



##### Abstract, Description, Claims

In [84]:
# The methods for querying these fields are all very similar to the ones for the titles
# Let's see a simple example with the Abstract

q = epab.query_abstract(search_terms="handover, base station", match_all=True, ignore_case=True)
print(q)
q.get_results("abstract", output_type="list", limit=2)

20 publications


[{'abstract': {'language': 'EN',
   'text': '<p id="pa01" num="0001">The present disclosure provides an information sending method, including in response to determining that a second SIM card in a connected state satisfies a handover condition for handing over the second SIM card from a first base station to a second base station, sending context information of a first SIM card to the first base station through the second SIM card, wherein the first SIM card is in an inactive state. According to the technical solutions of the present disclosure, the first SIM card does not need to send the context information to the second base station separately, nor temporarily adjust its state, thus the times of communication between the terminal and the base station are reduced, the communication progress is simplified and the communication resources are saved.<img id="iaf01" file="imgaf001.tif" wi="156" he="51" img-content="drawing" img-format="tif"/></p>'}},
 {'abstract': {'language': 'EN',
   't

In [95]:
q = epab.query_claims(search_terms="handover, base station", match_all=True, ignore_case=True, is_amended=False)
print(q)
q.get_results("claims.amendment_statement", output_type="list", limit=4)

75 publications


[{'claims': [{'amendment_statement': None}]},
 {'claims': [{'amendment_statement': None},
   {'amendment_statement': None},
   {'amendment_statement': None}]},
 {'claims': [{'amendment_statement': None}]},
 {'claims': [{'amendment_statement': None}]}]

### Querying the divisional values

In [8]:
q = epab.query_divisional_application_number("92%")
print(q)
q.get_results("divisional", limit=5)

54 publications


Unnamed: 0,divisional
0,"[{'application_number': '92116023.0', 'publica..."
1,"[{'application_number': '92120830.2', 'publica..."
2,"[{'application_number': '92106857.3', 'publica..."
3,"[{'application_number': '92105276.7', 'publica..."
4,"[{'application_number': '92118285.3', 'publica..."


In [14]:
q = epab.query_divisional_date(date_range="20180101-20230101")
print(q)
q.get_results("divisional", limit=5)

301 publications


Unnamed: 0,divisional
0,"[{'application_number': '18171860.2', 'publica..."
1,"[{'application_number': '19220153.1', 'publica..."
2,"[{'application_number': '20176043.6', 'publica..."
3,"[{'application_number': '20212577.9', 'publica..."
4,"[{'application_number': '20154304.8', 'publica..."


In [16]:
q = epab.query_divisional_publication_number("04%")
print(q)
q.get_results("divisional", limit=5)

78 publications


Unnamed: 0,divisional
0,"[{'application_number': '90202263.1', 'publica..."
1,"[{'application_number': '91111637.4', 'publica..."
2,"[{'application_number': '91202323.1', 'publica..."
3,"[{'application_number': '90125685.9', 'publica..."
4,"[{'application_number': '91201905.6', 'publica..."


### Querying parent values

In [4]:
q = epab.query_parent_application_number("92%")
print(q)
q.get_results("parent", limit=5)

96 publications


Unnamed: 0,parent
0,"[{'application_number': '99201346.6', 'publica..."
1,"[{'application_number': '92905435.1', 'publica..."
2,"[{'application_number': '92903822.2', 'publica..."
3,"[{'application_number': '92906491.3', 'publica..."
4,"[{'application_number': '92302400.4', 'publica..."


In [19]:
q = epab.query_parent_date(date_range="20180101-20230101")
print(q)
q.get_results("parent", limit=5)

255 publications


Unnamed: 0,parent
0,"[{'application_number': '19160310.9', 'publica..."
1,"[{'application_number': '21203998.6', 'publica..."
2,"[{'application_number': '21175877.6', 'publica..."
3,"[{'application_number': '18792345.3', 'publica..."
4,"[{'application_number': '20214161.0', 'publica..."


In [20]:
q = epab.query_parent_publication_number("04%")
print(q)
q.get_results("parent", limit=5)

139 publications


Unnamed: 0,parent
0,"[{'application_number': '90112550.0', 'publica..."
1,"[{'application_number': '90122791.8', 'publica..."
2,"[{'application_number': '90122566.4', 'publica..."
3,"[{'application_number': '92870002.0', 'publica..."
4,"[{'application_number': '91102110.3', 'publica..."


### Querying Search Report

In [76]:
q = epab.query_search_report(ipc=["B08", "B01"], is_no_unity=True, match_all=True)
print(q)
q.get_results("search_report", limit=5)

12 publications


Unnamed: 0,search_report.date,search_report.ipc_field,search_report.is_no_unity,search_report.is_incomplete_search,search_report.is_no_search
0,20220412,[B01J],True,False,False
1,20140113,[B01D],True,False,False
2,20111116,"[A61J, B01F, B65B]",True,False,False
3,20200117,"[G01N, C12M, B01L]",True,False,False
4,20141020,"[B01L, G01N, B04B]",True,False,False


### Querying Designated States

In [83]:
q = epab.query_designated_states(contracting="ES, IT", extension="AL")
print(q)
q.get_results("designated_states", limit=5)

17826 publications


Unnamed: 0,designated_states.contracting,designated_states.extension,designated_states.validation
0,"[AT, BE, BG, CH, CY, CZ, DE, DK, EE, ES, FI, F...","[AL, BA, HR, LV, MK, YU]",[]
1,"[AT, BE, BG, CH, CY, CZ, DE, DK, EE, ES, FI, F...","[AL, LT, LV, MK, RO, SI]",[]
2,"[AT, BE, CH, CY, DE, DK, ES, FI, FR, GB, GR, I...","[AL, LT, LV, MK, RO, SI]",[]
3,"[AT, BE, CH, CY, DE, DK, ES, FI, FR, GB, GR, I...","[AL, LT, LV, MK, RO, SI]",[]
4,"[AT, BE, CH, CY, DE, DK, ES, FI, FR, GB, GR, I...","[AL, LT, LV, MK, RO, SI]",[]


### Lang Queries

In [103]:
q = epab.query_abstract_language("en")
print(q)
q.get_results("abstract", limit=5)

39395 publications


Unnamed: 0,abstract.language,abstract.text
0,EN,"<p id=""pa01"" num=""0001"">There are disclosed a ..."
1,EN,"<p id=""pa01"" num=""0001"">A spark plug having a ..."
2,EN,"<p id=""pa01"" num=""0001"">A single method and ap..."
3,EN,"<p id=""pa01"" num=""0001"">The present invention ..."
4,EN,"<p id=""pa01"" num=""0001"">This invention relates..."


In [104]:
q = epab.query_description_language("eN")
print(q)
q.get_results("abstract", limit=5)

67552 publications


Unnamed: 0,abstract.language,abstract.text,abstract
0,EN,"<p id=""pa01"" num=""0001"">A water-soluble pressu...",
1,EN,"<p id=""pa01"" num=""0001"">A drum head and an att...",
2,EN,"<p id=""pa01"" num=""0001"">A method of combating ...",
3,EN,"<p id=""pa01"" num=""0001"">The invention disclose...",
4,,,


In [107]:
q = epab.query_claims_language("en")
print(q)
q.get_results("abstract", limit=5)

78459 publications


Unnamed: 0,abstract,abstract.language,abstract.text
0,,,
1,,,
2,,,
3,,EN,"<p id=""pa01"" num=""0001"">A printed circuit boar..."
4,,,


## Combining queries together 

Each query basically identifies a set of publications. 
Therefore, queries can be combined together with boolean operators providing: 

  - Intersection (logic *AND*): Using the `&` operator
  - Union (logic *OR*): Using the `|` operator
  - Exclusion (logic *AND NOT*): Using the `-` operator
  - Negation (logic *NOT*): Using the `~` operator

Let's see a few examples of how this allows to perform more complex (and complete) queries over the data set

##### Let's find all publication having specific combination of "Invention" and "Additional" IPC symbols

In [28]:
q1 = epab.query_ipc("H04W88%, H04W92%", match_all=True, invention=False)
q2 = epab.query_ipc("H04W36%", invention=True)
q3 = q1 & q2

print(q3)
q3.get_results("ipc.symbol, ipc.category", output_type="list")

85 publications


[{'ipc': [{'symbol': 'H04W36/00', 'category': 'Inv'},
   {'symbol': 'H04W12/04', 'category': 'Inv'},
   {'symbol': 'H04W36/22', 'category': 'Add'},
   {'symbol': 'H04W84/04', 'category': 'Add'},
   {'symbol': 'H04W88/06', 'category': 'Add'},
   {'symbol': 'H04W92/20', 'category': 'Add'},
   {'symbol': 'H04W12/06', 'category': 'Add'}]},
 {'ipc': [{'symbol': 'H04W36/08', 'category': 'Inv'},
   {'symbol': 'H04W16/16', 'category': 'Add'},
   {'symbol': 'H04W76/04', 'category': 'Add'},
   {'symbol': 'H04W92/20', 'category': 'Add'},
   {'symbol': 'H04W92/04', 'category': 'Add'},
   {'symbol': 'H04W88/08', 'category': 'Add'}]},
 {'ipc': [{'symbol': 'H04W36/28', 'category': 'Inv'},
   {'symbol': 'H04W76/15', 'category': 'Inv'},
   {'symbol': 'H04W88/06', 'category': 'Add'},
   {'symbol': 'H04W88/16', 'category': 'Add'},
   {'symbol': 'H04W92/24', 'category': 'Add'}]},
 {'ipc': [{'symbol': 'H04W36/00', 'category': 'Inv'},
   {'symbol': 'H04W76/00', 'category': 'Inv'},
   {'symbol': 'H04W76/23',

##### Let's find all A2 publications filed as application after 2010

In [29]:
q1 = epab.query_publication_kindcodes("A2")
q2 = epab.query_application_filing_date("20100101-20501231")
q3 = q1 & q2

print(q3)
q3.get_results("publication, application", limit=5)

206393 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language,application.number,application.filing_date
0,EP,3607617,A2,20200212,EN,17916455.3,20171222
1,EP,3606560,A2,20200212,EN,18715032.1,20180404
2,EP,3606530,A2,20200212,EN,18718398.3,20180329
3,EP,3606959,A2,20200212,EN,18720501.8,20180405
4,EP,3607151,A2,20200212,EN,18721145.3,20180404


##### Let's now exclude from the result list all Publications in English or German language

In [30]:
q4 = q3 - epab.query_publication_languages("EN,DE")

print(q4)
q4.get_results("publication, application", limit=5)

5009 publications


Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language,application.number,application.filing_date
0,EP,2236897,A2,20101006,FR,10290122.0,20100310
1,EP,2236898,A2,20101006,FR,10290123.8,20100310
2,EP,2236899,A2,20101006,FR,10305239.5,20100310
3,EP,2236356,A2,20101006,FR,10305117.3,20100204
4,EP,2236216,A2,20101006,FR,10305296.5,20100324


##### Let's now find all publications having either a certain IPC code or a set of terms in the description, and published after 2020

In [31]:
q1 = epab.query_description("handover, channel", match_all=True)
q2 = epab.query_ipc("H04W36%")
q3 = epab.query_publication_date("20200101-20250101")
q4 = (q1 | q2) & q3

print(q4)
q4.get_results("title.en, cpc.symbol", limit=5)

24816 publications


Unnamed: 0,title.en,cpc
0,METHOD FOR CONSTRUCTING LOGGED MEASUREMENT ENT...,[]
1,IDENTIFYING A BEAM FOR ACCESSING A TARGET CELL...,[]
2,ACCESS FOR GROUP CALL SERVICES THROUGH A BROAD...,[]
3,METHOD AND APPARATUS FOR PERFORMING POWER HEAD...,[]
4,ACTIVATING LINKED USER EQUIPMENTS,[]


## Retrieving data from a query

##### Let's start with a generic query 

In [32]:
q = epab.query_publication_date("%1109")
q

20631 publications

#### **get_results** vs. **iterator**: Getting all results in a single call or as batches in iterative calls

In [33]:
# Single call: Simpler, but cannot scale for big number of documents (memory, execution time etc)
all_results = q.get_results("title.en")
print("Total results with a single call to get_results:", len(all_results))

# Iterative data retrieval: Allows streamed data processing. Ideal for big data sets
print("\nResult iterating in batches of 60 documents:")
for batch in q.iterator("title.en", batch_size=60):
    print(f"In this iteration I have {len(batch)} publications")

Total results with a single call to get_results: 20631

Result iterating in batches of 60 documents:
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publications
In this iteration I have 60 publicatio

#### Limiting number of results

In [34]:
q.get_results("publication", limit=2)

Unnamed: 0,publication.country,publication.number,publication.kind,publication.date,publication.language
0,EP,2384335,A1,20111109,EN
1,EP,2384595,A1,20111109,EN


### What can I retrieve? 

All database fields listed by the `fields()` method are available for retrieval, and will be returned according to such schema.<br>
In this way, the returned results are always consistent with a predefined model.  

In [35]:
epab.fields()

WidDatabaseFields(header='', input_data={'': [{'name': 'epab_doc_id', 'type': 'STRING', 'mode': 'REQUIRED', 'd…

The content of a record can be returned with all its fields, e.g. 

In [36]:
q.get_results("publication", output_type="list", limit=2)

[{'publication': {'country': 'EP',
   'number': '2384335',
   'kind': 'A1',
   'date': '20111109',
   'language': 'EN'}},
 {'publication': {'country': 'EP',
   'number': '2384595',
   'kind': 'A1',
   'date': '20111109',
   'language': 'EN'}}]

Alternatively, it is possible to explicitly request only some sub-fields. However the structure will be preserved, e.g. 

In [37]:
q.get_results("publication.kind, publication.language", output_type="list", limit=2)

[{'publication': {'kind': 'B1', 'language': 'EN'}},
 {'publication': {'kind': 'B1', 'language': 'EN'}}]

The fields list can be specified as comma-separated string (see above), or in a classical Python list:

In [38]:
q.get_results(["publication.kind", "publication.language"], output_type="list", limit=2)

[{'publication': {'kind': 'B1', 'language': 'EN'}},
 {'publication': {'kind': 'B1', 'language': 'EN'}}]

##### Retrieving supplementary publication data (drawings etc.)

In addition to regular database fields, it is possible to retrieve also supplementary data which is attached to the publication.<br>
A special field in the database called **attachment** provides the complete list of such data, which in short are: 

- The **Drawings** (*DRW*) accompaining the publication
- The images **embedded** (*EMB*) in the description, abstract and claims (often also called "inlines", they typically represent mathematical expressions, chemistry formulas and tables) 
- The fac-simile of the original **Search Report** (*SREP*). Note: This is a set of images. The database also stores text information extracted from the Search Report in the fields *search_report** and *srep_citation*
- The **Sequence Listing** (*SEQL*) files
- The **PDF** (*PDF*) version of the original publication
- Any **additional** (*ADD*) file accompaining the publication
- The source ST.36 **XML file** produced by the EPO and used for extracting all the database fields. It can be useful for extracting some information not (yet) available in the database

For instance: 

In [39]:
result = q.get_results(["publication.language, attachment"], output_type="list", limit=1)
result

[{'publication': {'language': 'EN',
   '_field_2': [{'category': 'EMB',
     'file_name': 'imgb0005.tif',
     'file_format': 'PNG',
     'drawing_id': None,
     'drawing_number': None},
    {'category': 'EMB',
     'file_name': 'imgb0020.tif',
     'file_format': 'PNG',
     'drawing_id': None,
     'drawing_number': None},
    {'category': 'EMB',
     'file_name': 'imgb0006.tif',
     'file_format': 'PNG',
     'drawing_id': None,
     'drawing_number': None},
    {'category': 'EMB',
     'file_name': 'imgb0025.tif',
     'file_format': 'PNG',
     'drawing_id': None,
     'drawing_number': None},
    {'category': 'EMB',
     'file_name': 'imgb0024.tif',
     'file_format': 'PNG',
     'drawing_id': None,
     'drawing_number': None},
    {'category': 'EMB',
     'file_name': 'imgb0027.tif',
     'file_format': 'PNG',
     'drawing_id': None,
     'drawing_number': None},
    {'category': 'EMB',
     'file_name': 'imgb0030.tif',
     'file_format': 'PNG',
     'drawing_id': None,
  

To actually retrieve the content of one or more of the above attachments, the corresponding *category* can be specified in the **attachment** parameter of the get_results method, e.g.
<br>**IMPORTANT** : This operation is very resource-intensive!! It can lead to high execution times and high memory consumption!! 

In [40]:
result = q.get_results(["publication.language"], attachment=["DRW", "PDF"], output_type="list", limit=1)

In this case, the **attachment** field is also automatically retrieved and the byte-content of the corresponding attachment is stored in a sub-field called **content**, i.e.

In [41]:
for entry in result[0]["attachment"]: 
    print(entry["file_name"], entry["category"])
    print("Content: ", entry.get("content", "Not retrieved")[:50])
    print()

imgaf001.tif EMB
Content:  Not retrieved

imgb0001.tif EMB
Content:  Not retrieved

imgf0003.tif DRW
Content:  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x06\x87\x00\x00\t\x1c\x08\x00\x00\x00\x00\x13\x00|\xc6\x00\x00 \x00IDATx\x01\xec\xc1\tb\xdb\xc8\x96'

imgf0001.tif DRW
Content:  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x06o\x00\x00\x08A\x08\x00\x00\x00\x00!R!\xde\x00\x00 \x00IDATx\x01\xec\xc1\tB\xe2P\x00'

imgf0004.tif DRW
Content:  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x089\x00\x00\x08-\x08\x00\x00\x00\x00\xb8\x183\xf6\x00\x00 \x00IDATx\x01\xec\xc1\x01B[;\x02'

imgf0005.tif DRW
Content:  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x06-\x00\x00\x0cY\x08\x00\x00\x00\x00\xa0\xd7R\xa6\x00\x00 \x00IDATx\x01\xec\xc1\x07b\xe2@\x00'

imgf0002.tif DRW
Content:  b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x08\x93\x00\x00\x08\x87\x08\x00\x00\x00\x00\xbe\xe5\x9bf\x00\x00 \x00IDATx\x01\xec\xc1\x81B\xe2X\x00'

srep0001.tif SREP
Content:  Not retrieved

EP87302951NWA1.pdf PDF


A simple mechanism for displaying e.g. the drawing is offered by ipython native libraries, e.g.

In [42]:
from IPython.display import display, Image

image = Image(result[0]["attachment"][0]["content"], width=300)
display(image)


KeyError: 'content'

In the coming sections we will see additional simplified mechanisms offered by the library to display and store the retrieved content 

### Getting the results in different formats

The result list can be provided in a number of different formats using the `output_type`parameter: 

- **dataframe** (default) : Data are embedded in a **Pandas** DataFrame. Ideal for powerful data-processing
- **list** : Data are embedded in a simple, plain **list of dict**, for ensuring maximum Python compatibility and exchangeability.
- **polars** (*COMING SOON!*): Data are embedded in a **Polars** DataFrame (Polars is an emerging and very powerful alternative to Pandas) 
- **datagrid**: For tabular visualization in an **Excel-like** widget, which allows simple filtering, sorting etc. (currently still limited)
- **widgets**: A list of reusable **ipywidgets** component, for advanced single-record visualization 

#### Getting results in a Pandas DataFrame 

In [None]:
dataframe = q.get_results("publication, ipc") #, output_type="dataframe"
dataframe

#### Getting results in a plain list of dicts

In [None]:
results = q.get_results(["publication", "representative"], output_type="list", limit=3)
results

#### Getting results in a DataGrid structure 

In [None]:
results_as_datagrid = q.get_results(["publication", "representative"], output_type="datagrid")
print(q)
results_as_datagrid

#### Getting results as a list of Widgets

In [None]:
results_as_widgets = q.get_results(["publication", "application", "representative", "description"], output_type="widgets", limit=4)

##### Each element of the resulting list is a widget representing the retrieved elements as a rich ipyvuetify widget 

In [None]:
results_as_widgets[2]

Every sub-widget is also separately accessible and can be conveniently re-used in code/dashboards if needed, e.g. 

In [None]:
results_as_widgets[2].representative

## Retrieving statistics of a query

##### Let's start with a generic query 

In [None]:
q = epab.query_ipc("H04W%")
q

#### Retrieve statistics for a certain field

The method **get_stats** returns a dataframe with the statistics over one or more selected fields

- the **count** column reports the total number of occurrences of the corresponding field(s) value
- the **unique_publications** column reports the number of unique publications having that value
- the last two lines of the table are used to report the remainder and the total

Let's see a couple of simple examples

In [None]:
q.get_stats("inventor.country", limit=5)

In [None]:
q.get_stats("applicant.name", limit=5)

The results can also be sorted by a chosen column

In [None]:
q.get_stats("publication.kind", order_by="publication.kind ASC", limit=5)

Another example with IPC classification

In [None]:
q.get_stats("ipc.symbol", limit=15)

### Statistic over multiple fields

In [None]:
q.get_stats("applicant.country, ipc.symbol ", limit=5)

In [None]:
q.get_stats("applicant.country, inventor.name", limit=5, output_type="list")

## Browsing the query results

The query class offers a browser method for inspecting the complete content of the resulting publication. 
<br>This method should be wisely used as it computationally intensive (it retrieves all fields and images), so it's ideal for visual results inspection and it can be very conveniently embedded in a dashboard (it's a widget!)

In [None]:
# Retrieving 3 publications by their id
q1 = epab.query_epab_doc_id(['EP4040719A120220810', 'EP0634409A119950118', 'EP0035957B119830727'])
browser = q1.browse_results()
browser

In [None]:
browser = q.browse_results()
browser

Being a widget, it is event-reactive and it can be controlled using Python code! 

In [None]:
browser.next()

In [None]:
browser.previous()

In [None]:
browser.selected_pub = 3

In [None]:
browser.load_new_page()

Furthemrore, the currently displayed single widgets as well as the source data can be also accessed

In [None]:
browser.get_current_publication()

In [None]:
browser.publication.application

## Advanced usage of Queries

In [None]:
q_h04w = epab.query("cpc.symbol like 'H04W%'")

When you try to represent this new wuery object you will get the count of queried documents

In [None]:
q_h04w

You can get the count in a more pythonic way with len function:

In [None]:
len(q_h04w)

Or using .length property of the object

In [None]:
q_h04w.length

Now lets find some H04B  documents

In [None]:
q_h04l = epab.query("cpc.symbol like 'H04L%'")
q_h04l

## Sending a raw SQL query

In [None]:
statement = f"select epab_doc_id, application, applicant from `{epab.full_table_name}` where application.filing_date='20221014';"
statement

In [None]:
results = epab.sql_query(statement)
results

In [None]:
for res in epab.sql_query_iterator(statement):
    print(res)

### Some advanced examples using the widgets

In [None]:
# Display the results using the vuetify grid system
import ipyvuetify as vue

table = vue.Container(children=[
    vue.Row(children=[
        vue.Col(children=[res.publication], cols="2"),
        vue.Col(children=[res.representative], cols="10")
    ], dense=True, outlined=True, class_="elevation-1") for res in results_as_widgets
])

table


In [None]:
# ADVANCED EXAMPLE 
# Create a vuetify table with the results  

import traitlets 
import ipywidgets, ipyvuetify

class RichDataTable(ipyvuetify.VuetifyTemplate):
    headers = traitlets.List().tag(sync=True, allow_null=True)
    items = traitlets.List().tag(sync=True, **ipywidgets.widget_serialization)
    template = traitlets.Unicode('''
        <template>
            <v-data-table dense :headers="headers" :items="items">
              <template v-slot:item="row">
                  <tr>
                    <td><jupyter-widget :widget="row.item.p" /></td>
                    <td><jupyter-widget :widget="row.item.r" /></td>
                  </tr>
              </template>
            </v-data-table>
        </template>
        ''').tag(sync=True)

RichDataTable(headers=[
            {'text': 'Publication', 'value': 'p'},
            {'text': 'Representative', 'value': 'r'}],
          items = [ {'p': wid.publication, 'r': wid.representative} for wid in results_as_widgets ]
         )