Skip to content

Commit

Permalink
[mw] Pad document numbers for granted patents to 8 digits
Browse files Browse the repository at this point in the history
with leading zeros when accessing USPTO for PDF documents
  • Loading branch information
amotl committed Mar 5, 2019
1 parent d87dd46 commit f8c4d50
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 4 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Development
- [mw] Fix tests
- [mw] Resolve issue when European publication server returns
reference to WIPO as HTML response instead of PDF document
- [mw] Pad document numbers for granted patents to 8 digits
with leading zeros when accessing USPTO for PDF documents


2019-02-21 0.163.0
Expand Down
11 changes: 10 additions & 1 deletion patzilla/access/uspto/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,20 @@ def pdf_url(document_number):
Application: US2016101909A1
http://pdfaiw.uspto.gov/fdd/09/2016/19/010/0.pdf
>>> pdf_url('US2016101909A1')
'http://pdfaiw.uspto.gov/fdd/09/2016/19/010/0.pdf'
Grant: US10194689B2
http://pdfpiw.uspto.gov/fdd/89/946/101/0.pdf
>>> pdf_url('US10194689B2')
'http://pdfpiw.uspto.gov/fdd/89/946/101/0.pdf'
>>> pdf_url('US2548918')
'http://pdfpiw.uspto.gov/fdd/18/489/025/0.pdf'
"""

document = normalize_patent(document_number, for_ops=False, as_dict=True)
document = normalize_patent(document_number, for_ops=False, as_dict=True, provider='uspto')
if not document:
return

Expand Down
5 changes: 5 additions & 0 deletions patzilla/util/numbers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@

log = logging.getLogger(__name__)


class DocumentIdentifierBunch(SmartBunch):

def __str__(self):
return self.dump()

def serialize(self):
return join_patent(self)


def join_patent(patent):
if not patent:
return
Expand Down
19 changes: 16 additions & 3 deletions patzilla/util/numbers/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@
# (c) 2007-2018 Andreas Motl <andreas.motl@ip-tools.org>
import re
import types
import logging
from copy import copy
from patzilla.util.numbers.denormalize import denormalize_patent_wo
from patzilla.util.numbers.helper import pad_left, trim_leading_zeros, fullyear_from_year
from patzilla.util.numbers.common import split_patent_number, join_patent
from patzilla.util.numbers.common import decode_patent_number, split_patent_number, join_patent


logger = logging.getLogger(__name__)


"""
Normalize patent- and document-numbers.
"""


def patch_patent(patent, provider=None):

if not patent:
Expand Down Expand Up @@ -388,6 +392,12 @@ def normalize_patent_wo_pct(patent):


def normalize_patent_us(patent, provider=None):
"""
# TODO:
# >>> DocumentIdentifier('US2548918').normalize(provider=DocumentProvider.USPTO).serialize()
>>> normalize_patent_us(decode_patent_number('US2548918'), provider='uspto').serialize()
'US02548918'
"""

# USPTO number formats

Expand Down Expand Up @@ -467,14 +477,17 @@ def normalize_patent_us(patent, provider=None):
padding = '0' * (11 - length)
patched['number'] = patched['number'][0:4] + padding + patched['number'][4:]


# 2018-04-23: Espacenet changed behavior, handle edge case for
# USD813591S to yield https://worldwide.espacenet.com/publicationDetails/claims?CC=US&NR=D813591S&KC=S
if provider == 'espacenet':
if 'number-type' in patched:
if patched['number-type'] == 'D' and patched['kind'] == 'S':
patched['number'] += patched['kind']

# 2019-03-05: When going to the USPTO itself, pad sequential number to 8 digits.
if provider == 'uspto':
patched['number'] = patched['number'].zfill(8)

return patched


Expand Down

0 comments on commit f8c4d50

Please sign in to comment.