Skip to content

Commit

Permalink
adapted code to pypdf2 3.0 where numPages is no longer a member of Pd…
Browse files Browse the repository at this point in the history
…fReader
  • Loading branch information
g-raffy committed Feb 20, 2024
1 parent 67e70e2 commit 01380e7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 11 deletions.
6 changes: 2 additions & 4 deletions src/pymusco/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,6 @@ def scan_to_stub(src_scanned_pdf_file_path: Path, dst_stub_pdf_file_path: Path,
scanned_image_file_paths = []
with open(src_scanned_pdf_file_path, 'rb') as src_pdf_file:
pdf_reader = PyPDF2.PdfReader(src_pdf_file)
# pdfReader.numPages
# 19
page_index = 0
for page in pdf_reader.pages:
print(f'page_index = {page_index}')
Expand Down Expand Up @@ -454,7 +452,7 @@ def split_double_pages(src_scanned_pdf_file_path: Path, dst_scanned_pdf_file_pat
scanned_image_file_paths = []
with open(src_scanned_pdf_file_path, 'rb') as src_pdf_file:
pdf_reader = PyPDF2.PdfReader(src_pdf_file)
for page_index in range(pdf_reader.numPages):
for page_index in range(len(pdf_reader.pages)):
print(f'page_index = {page_index}')
double_page = pdf_reader.pages[page_index]
image_name = f'page{page_index:03d}'
Expand Down Expand Up @@ -503,7 +501,7 @@ def crop_pdf(src_scanned_pdf_file_path: Path, dst_scanned_pdf_file_path: Path, x
scanned_image_file_paths = []
with open(src_scanned_pdf_file_path, 'rb') as src_pdf_file:
pdf_reader = PyPDF2.PdfReader(src_pdf_file)
for page_index in range(pdf_reader.numPages):
for page_index in range(len(pdf_reader.pages)):
print(f'page_index = {page_index}')
page = pdf_reader.pages[page_index]
image_name = f'page{page_index:03d}'
Expand Down
12 changes: 5 additions & 7 deletions src/pymusco/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def find_pdf_page_raster_image(pdf_page: PyPDF2.PageObject) -> PyPDF2.generic.En
:return PyPDF2.generic.EncodedStreamObject: a pdf node which is supposed to contain an image
"""
if '/XObject' in pdf_page['/Resources']:
x_object = pdf_page['/Resources']['/XObject'].getObject()
x_object = pdf_page['/Resources']['/XObject'].get_object()
for obj in x_object:
if x_object[obj]['/Subtype'] == '/Image':
return x_object[obj]
Expand Down Expand Up @@ -240,7 +240,7 @@ def extract_pdf_page_images(pdf_page: PyPDF2.PageObject, image_folder='/tmp'):
:param PyPDF2.pdf.PageObject pdf_page:
:param str image_folder:
"""
x_object = pdf_page['/Resources']['/XObject'].getObject()
x_object = pdf_page['/Resources']['/XObject'].get_object()

for obj in x_object:
print(type(obj))
Expand Down Expand Up @@ -351,9 +351,7 @@ def add_stamp(src_pdf_file_path: Path, dst_pdf_file_path: Path, stamp_file_path:
pdf_writer = PyPDF2.PdfWriter()
with open(src_pdf_file_path, 'rb') as src_pdf_file:
pdf_reader = PyPDF2.PdfReader(src_pdf_file)
# pdfReader.numPages
# 19
for page_index in range(pdf_reader.numPages):
for page_index in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_index]
# page.mergePage(watermark)
page.mergeScaledTranslatedPage(watermark, scale=scale, tx=tx, ty=ty)
Expand All @@ -380,11 +378,11 @@ def check_pdf(src_pdf_file_path: Path):
"""
with open(src_pdf_file_path, 'rb') as src_pdf_file:
pdf_reader = PyPDF2.PdfReader(src_pdf_file)
for page_index in range(pdf_reader.numPages):
for page_index in range(len(pdf_reader.pages)):
print(f'page_index = {page_index}')
pdf_page = pdf_reader.pages[page_index]
if '/XObject' in pdf_page['/Resources']:
x_object = pdf_page['/Resources']['/XObject'].getObject()
x_object = pdf_page['/Resources']['/XObject'].get_object()
for obj in x_object:
if x_object[obj]['/Subtype'] == '/Image':
pdf_stream = x_object[obj]
Expand Down

0 comments on commit 01380e7

Please sign in to comment.