Skip to content

Commit

Permalink
fixed some styling issues
Browse files Browse the repository at this point in the history
  • Loading branch information
g-raffy committed Nov 7, 2023
1 parent 81c6e4f commit 6fb4946
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 139 deletions.
27 changes: 13 additions & 14 deletions src/pymusco.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
import argparse
import sys
from pathlib import Path
from pymusco import Piece, load_piece_description
from pymusco import load_orchestra
Expand All @@ -10,15 +11,14 @@
from pymusco import stub_to_print
from pymusco import StampDesc

import sys

RED = "\033[1;31m"
BLUE = "\033[1;34m"
CYAN = "\033[1;36m"
GREEN = "\033[0;32m"
RESET = "\033[0;0m"
BOLD = "\033[;1m"
REVERSE = "\033[;7m"
RED = "\033[1;31m" # noqa:E221
BLUE = "\033[1;34m" # noqa:E221
CYAN = "\033[1;36m" # noqa:E221
GREEN = "\033[0;32m" # noqa:E221
RESET = "\033[0;0m" # noqa:E221
BOLD = "\033[;1m" # noqa:E221
REVERSE = "\033[;7m" # noqa:E221

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='python musical score sheet music processor')
Expand Down Expand Up @@ -50,7 +50,7 @@
orchestra = load_orchestra(Path(namespace.orchestra_file_path))
except Exception as e:
print(RED, str(e), RESET)
#sys.exit(1)
# sys.exit(1)
raise

if namespace.command == 'build-stub':
Expand All @@ -70,17 +70,17 @@
page_info_line_y_pos=piece.page_info_line_y_pos)
except Exception as e:
print(RED, "failed to process %s (%s)" % (scan_desc_file_path, str(e)), RESET)
#sys.exit(1)
# sys.exit(1)
raise

if namespace.command == 'build-print':

try:
track_selector = None
if namespace.track_selector == 'ts-auto' :
if namespace.track_selector == 'ts-auto':
musician_count = load_musician_count(Path(namespace.headcount_file_path))
track_selector = AutoTrackSelector(musician_count, orchestra)
if namespace.track_selector == 'ts-single' :
if namespace.track_selector == 'ts-single':
track_selector = SingleTrackSelector(namespace.track_id, orchestra)
assert track_selector is not None
stub_to_print(src_stub_file_path=Path(namespace.stub_file_path),
Expand All @@ -89,6 +89,5 @@
orchestra=orchestra)
except Exception as e:
print(RED, str(e), RESET)
#sys.exit(1)
# sys.exit(1)
raise

3 changes: 1 addition & 2 deletions src/pymusco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
from .main import merge_pdf
from .main import remove_unneeded_pdf_password
from .main import StampDesc
#from .tesseract import extract_pdf_text
# from .tesseract import extract_pdf_text
from .tsauto import load_musician_count
from .tsauto import AutoTrackSelector
from .tssingle import SingleTrackSelector
from .tsmanual import ManualTrackSelector
from .pdf import check_pdf
from .piece import Piece, Catalog, load_piece_description

133 changes: 63 additions & 70 deletions src/pymusco/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
from pathlib import Path
from .core import rotate_image

"""
Extract images from pdf: http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
Extract images coded with CCITTFaxDecode in .net: http://stackoverflow.com/questions/2641770/extracting-image-from-pdf-with-ccittfaxdecode-filter
TIFF format and tags: http://www.awaresystems.be/imaging/tiff/faq.html
"""
# Extract images from pdf: http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
# Extract images coded with CCITTFaxDecode in .net: http://stackoverflow.com/questions/2641770/extracting-image-from-pdf-with-ccittfaxdecode-filter
# TIFF format and tags: http://www.awaresystems.be/imaging/tiff/faq.html
# https://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python/34116472#34116472


Expand Down Expand Up @@ -53,17 +51,16 @@ def extract_pdf_stream_image(pdf_stream, image_dir, image_name):
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/PyPDF2/filters.py", line 361, in decodeStreamData
# raise NotImplementedError("unsupported filter %s" % filterType)
# NotImplementedError: unsupported filter /CCITTFaxDecode
"""
The CCITTFaxDecode filter decodes image data that has been encoded using
either Group 3 or Group 4 CCITT facsimile (fax) encoding. CCITT encoding is
designed to achieve efficient compression of monochrome (1 bit per pixel) image
data at relatively low resolutions, and so is useful only for bitmap image data, not
for color images, grayscale images, or general data.
K < 0 --- Pure two-dimensional encoding (Group 4)
K = 0 --- Pure one-dimensional encoding (Group 3, 1-D)
K > 0 --- Mixed one- and two-dimensional encoding (Group 3, 2-D)
"""

# The CCITTFaxDecode filter decodes image data that has been encoded using
# either Group 3 or Group 4 CCITT facsimile (fax) encoding. CCITT encoding is
# designed to achieve efficient compression of monochrome (1 bit per pixel) image
# data at relatively low resolutions, and so is useful only for bitmap image data, not
# for color images, grayscale images, or general data.

# K < 0 --- Pure two-dimensional encoding (Group 4)
# K = 0 --- Pure one-dimensional encoding (Group 3, 1-D)
# K > 0 --- Mixed one- and two-dimensional encoding (Group 3, 2-D)
if pdf_stream['/DecodeParms']['/K'] == -1:
CCITT_group = 4
else:
Expand All @@ -85,38 +82,36 @@ def extract_pdf_stream_image(pdf_stream, image_dir, image_name):
if pdf_stream['/BitsPerComponent'] == 1:
mode = "1"
else:
color_space, indirect_object = pdf_stream['/ColorSpace'] # @UnusedVariable
color_space, indirect_object = pdf_stream['/ColorSpace'] # pylint: disable=unused-variable
print("color_space :", color_space)
# print("indirect_object :", indirect_object)
# :param PyPDF2.generic.IndirectObject indirect_object:

# print(type(indirect_object))
# print(dir(indirect_object))

# ['/ICCBased', IndirectObject(13, 0)]

# indObj, isIndirect := obj.(*PdfIndirectObject); isIndirect {
"""
// TraceToDirectObject traces a PdfObject to a direct object. For example direct objects contained
// in indirect objects (can be double referenced even).
//
// Note: This function does not trace/resolve references. That needs to be done beforehand.
func TraceToDirectObject(obj PdfObject) PdfObject {
iobj, isIndirectObj := obj.(*PdfIndirectObject)
depth := 0
for isIndirectObj == true {
obj = iobj.PdfObject
iobj, isIndirectObj = obj.(*PdfIndirectObject)
depth++
if depth > TraceMaxDepth {
common.Log.Error("Trace depth level beyond 20 - error!")
return nil
}
}
return obj
}
"""

# // TraceToDirectObject traces a PdfObject to a direct object. For example direct objects contained
# // in indirect objects (can be double referenced even).
# //
# // Note: This function does not trace/resolve references. That needs to be done beforehand.
# func TraceToDirectObject(obj PdfObject) PdfObject {
# iobj, isIndirectObj := obj.(*PdfIndirectObject)
# depth := 0
# for isIndirectObj == true {
# obj = iobj.PdfObject
# iobj, isIndirectObj = obj.(*PdfIndirectObject)
# depth++
# if depth > TraceMaxDepth {
# common.Log.Error("Trace depth level beyond 20 - error!")
# return nil
# }
# }
# return obj
# }

if color_space == '/DeviceRGB':
mode = "RGB"
elif color_space == '/ICCBased':
Expand All @@ -130,7 +125,7 @@ def extract_pdf_stream_image(pdf_stream, image_dir, image_name):
expected_packed_image_data_size = bytes_per_line * height # packed image size supposing image is stored as 1 bit per pixel
if len(data) == expected_packed_image_data_size:
one_bit_per_pixel = True

if one_bit_per_pixel:
mode = "1" # (1-bit pixels, black and white, stored with one pixel per byte)
else:
Expand Down Expand Up @@ -158,7 +153,7 @@ def extract_pdf_stream_image(pdf_stream, image_dir, image_name):
def find_pdf_page_raster_image(pdf_page):
"""
finds the first raster image in this page
:param PyPDF2.pdf.PageObject pdf_page:
:return PyPDF2.generic.EncodedStreamObject: a pdf node which is supposed to contain an image
"""
Expand All @@ -178,7 +173,7 @@ def extract_pdf_page_main_image(pdf_page, image_dir, image_name):
:return str: the saved image file path with file extension
"""
pdf_stream = find_pdf_page_raster_image(pdf_page)

if pdf_stream is not None:
# this pdf page contains a raster image; we deduce from that that it has been scanned
try:
Expand All @@ -189,10 +184,10 @@ def extract_pdf_page_main_image(pdf_page, image_dir, image_name):
saved_image_file_path = "%s/%s.png" % (image_dir, image_name)
cv2.imwrite(saved_image_file_path, image)
print("resampled image saved to %s" % saved_image_file_path)

if '/Rotate' in pdf_page.keys() and pdf_page['/Rotate'] != 0:
# some extracted images are not in portrait mode as we would expect, so rotate them

# non rotated page contents
# {
# '/Parent': IndirectObject(3, 0),
Expand All @@ -202,7 +197,7 @@ def extract_pdf_page_main_image(pdf_page, image_dir, image_name):
# '/Rotate': 0,
# '/MediaBox': [0, 0, 595.32, 841.92]
# }

# rotated_page_contents:
# {
# '/Parent': IndirectObject(3, 0),
Expand Down Expand Up @@ -249,7 +244,7 @@ def extract_pdf_page_images(pdf_page, image_folder='/tmp'):
for obj in xObject:
print(type(obj))
print(type(xObject[obj]))

if xObject[obj]['/Subtype'] == '/Image':
saved_image_file_path = extract_pdf_stream_image(pdf_stream=xObject[obj], image_dir=image_folder, image_name=obj[1:])
print('extracted image : %s' % saved_image_file_path)
Expand All @@ -261,7 +256,7 @@ def pdf_page_to_png(pdf_page, resolution=72):
"""
dst_pdf = PyPDF2.PdfWriter()
dst_pdf.add_page(pdf_page)

tmp_dir = Path('/tmp/pymusco')
tmp_dir.mkdir(parents=True, exist_ok=True)

Expand All @@ -273,7 +268,7 @@ def pdf_page_to_png(pdf_page, resolution=72):
subprocess.check_call(['/opt/local/bin/convert', '-density', '%d' % resolution, tmp_pdf_file_path, tmp_png_file_path])
image = cv2.imread(tmp_png_file_path)
print(type(image))

return image


Expand Down Expand Up @@ -320,9 +315,9 @@ def crawl_tree(tree, parent):

def add_stamp(src_pdf_file_path, dst_pdf_file_path, stamp_file_path, scale=1.0, tx=500.0, ty=770.0):
"""
warning! this function has a side effect : it removes the bookmark!
:param str stamp_file_path: location of the pdf file containing the stamp used
"""
pdf_watermark_reader = PyPDF2.PdfReader(open(stamp_file_path, 'rb'))
Expand All @@ -346,12 +341,12 @@ def add_stamp(src_pdf_file_path, dst_pdf_file_path, stamp_file_path, scale=1.0,
# page.mergePage(watermark)
page.mergeScaledTranslatedPage(watermark, scale=scale, tx=tx, ty=ty)
# pdf_writer.addBookmark(title='toto %s' % page_index, pagenum=page_index, parent=None, color=None, bold=False, italic=False, fit='/Fit')

pdf_writer.add_page(page)
# pdf_writer.addBookmark('Hello, World Bookmark', 0, parent=None)
# pdf_writer.addBookmark(title='toto', pagenum=2, parent=None, color=None, bold=False, italic=False, fit='/Fit')
# pdf_writer.setPageMode("/UseOutlines")

with open(tmp_dst_pdf_file_path, 'wb') as dst_pdf_file:
pdf_writer.write(dst_pdf_file)
dst_pdf_file.close()
Expand All @@ -363,7 +358,6 @@ def add_stamp(src_pdf_file_path, dst_pdf_file_path, stamp_file_path, scale=1.0,
def check_pdf(src_pdf_file_path):
"""
the purpose of this function is to detect inconsistencies in the given pdf file
an exception is raised if the pdf is malformed
please note that all maformations are not detected yet
"""
Expand All @@ -384,17 +378,16 @@ def check_pdf(src_pdf_file_path):
# File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/PyPDF2/filters.py", line 361, in decodeStreamData
# raise NotImplementedError("unsupported filter %s" % filterType)
# NotImplementedError: unsupported filter /CCITTFaxDecode
"""
The CCITTFaxDecode filter decodes image data that has been encoded using
either Group 3 or Group 4 CCITT facsimile (fax) encoding. CCITT encoding is
designed to achieve efficient compression of monochrome (1 bit per pixel) image
data at relatively low resolutions, and so is useful only for bitmap image data, not
for color images, grayscale images, or general data.
K < 0 --- Pure two-dimensional encoding (Group 4)
K = 0 --- Pure one-dimensional encoding (Group 3, 1-D)
K > 0 --- Mixed one- and two-dimensional encoding (Group 3, 2-D)
"""

# The CCITTFaxDecode filter decodes image data that has been encoded using
# either Group 3 or Group 4 CCITT facsimile (fax) encoding. CCITT encoding is
# designed to achieve efficient compression of monochrome (1 bit per pixel) image
# data at relatively low resolutions, and so is useful only for bitmap image data, not
# for color images, grayscale images, or general data.

# K < 0 --- Pure two-dimensional encoding (Group 4)
# K = 0 --- Pure one-dimensional encoding (Group 3, 1-D)
# K > 0 --- Mixed one- and two-dimensional encoding (Group 3, 2-D)
if pdf_stream['/DecodeParms']['/K'] == -1:
CCITT_group = 4
else:
Expand All @@ -412,7 +405,7 @@ def check_pdf(src_pdf_file_path):
_, _, tb = sys.exc_info()
# traceback.print_tb(tb) # Fixed format
tb_info = traceback.extract_tb(tb)
filename, line, func, text = tb_info[-1] # @UnusedVariable
filename, line, func, text = tb_info[-1] # pylint: disable=unused-variable
# print('assert error on file {} line {} in statement {}'.format(filename, line, text))
if text == 'assert len(data) % rowlength == 0':
# this seems to be a zealous assert that fails even on legitimate pdf output of pdflatex, so ignore it
Expand All @@ -422,7 +415,7 @@ def check_pdf(src_pdf_file_path):
print('data length : %d' % len(data))
num_pixels = width * height
print(width, height, num_pixels)
color_space, indirect_object = pdf_stream['/ColorSpace'] # @UnusedVariable
color_space, indirect_object = pdf_stream['/ColorSpace'] # pylint: disable=unused-variable
print("color_space :", color_space)
if color_space == '/DeviceRGB':
mode = "RGB"
Expand All @@ -437,15 +430,15 @@ def check_pdf(src_pdf_file_path):
expected_packed_image_data_size = bytes_per_line * height # packed image size supposing image is stored as 1 bit per pixel
if len(data) == expected_packed_image_data_size:
one_bit_per_pixel = True

if one_bit_per_pixel:
mode = "1" # (1-bit pixels, black and white, stored with one pixel per byte)
else:
mode = "P" # (8-bit pixels, mapped to any other mode using a color palette)
else:
mode = "P" # (8-bit pixels, mapped to any other mode using a color palette)
if pdf_stream['/Filter'] == '/FlateDecode':
img = Image.frombytes(mode, (width, height), data) # @UnusedVariable
img = Image.frombytes(mode, (width, height), data) # noqa:F841 pylint: disable=unused-variable
elif pdf_stream['/Filter'] == '/DCTDecode':
pass
elif pdf_stream['/Filter'] == '/JPXDecode':
Expand Down
Loading

0 comments on commit 6fb4946

Please sign in to comment.