pymusco is now packaged with setuptools

as a result: - it's easier to install (it can now be installed with pip) - this standard install makes linting easier for projects that depend on it also added more type hinting to improve code readability
g-raffy · Feb 16, 2024 · c046a8d · c046a8d
1 parent dc90719
commit c046a8d
Show file tree

Hide file tree

Showing 8 changed files with 115 additions and 46 deletions.
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
@@ -57,7 +57,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install flake8 pytest
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip ./setup.py install
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names

diff --git a/README.md b/README.md
@@ -25,6 +25,11 @@ By making heavy use of digitized sheet music, `pymusco` provides a way to addres
 
 Digitizing original sheet music can be illegal depending on countries and editors. `pymusco` encourages in no way to trespass the law. `pymusco` doesn't have to be used with material subject to copyright.
 
+## how to install
+
+```
+python ./setup.py install
+```
 
 ## how to use
 

diff --git a/setup.py b/setup.py
@@ -0,0 +1,23 @@
+from setuptools import setup
+
+setup(
+    name='pymusco',
+    version=1.0,
+    description='python musical score tool',
+    url='https://github.com/g-raffy/pymusco',
+    author='Guillaume Raffy',
+    author_email='guillaume.raffy@univ-rennes1.fr',
+    license='MIT',
+    packages=['pymusco'],
+    package_dir={
+        '': 'src'
+    },
+    scripts = [
+        'src/apps/pymusco'
+    ],
+    install_requires=[
+        'PyPDF2>= 3.0.0',  # the syntax has changed between PyPDF2 2.x and PyPDF2 3.x
+        'pillow',
+        'opencv-python'
+    ],
+    zip_safe=False)
diff --git a/src/pymusco/__init__.py b/src/pymusco/__init__.py
@@ -1,10 +1,15 @@
 from .core import load_commented_json
+from .core import InstrumentId
+from .core import VoiceId
+from .core import TrackId
+from .core import Clef
 from .core import Instrument
 from .core import Track
 from .core import Orchestra
 from .core import load_orchestra
 from .core import TableOfContents
 from .core import InstrumentNotFound
+from .core import ITrackSelector
 from .main import scan_to_stub
 from .main import stub_to_print
 from .main import split_double_pages

diff --git a/src/pymusco/core.py b/src/pymusco/core.py
@@ -231,29 +231,35 @@ def load_orchestra(orchestra_file_path: Path) -> Orchestra:
     return dict_to_orchestra(load_commented_json(orchestra_file_path))
 
 
-
-
 # class Clef(Enum):
 #     TREBLE = 1
 #     BASS = 2
+InstrumentId = str  # the identifier of an instrument "bb trombone"
+VoiceId = int  # each instrument usually has multiple tracks (often 3), that's what we call voices
+TrackId = str  # the identifier of a track in the form "bb trombone 2 bc"
+Clef = str  # 'tc' for treble clef, 'bc' for bass clef
 
-
-class Track(object):
-    track_id: str  # the identifier of a track in the form "bb trombone 2 bc"
+class Track():
+    track_id: TrackId  # the identifier of a track in the form "bb trombone 2 bc"
     orchestra: Orchestra  # the catalog of available instruments to use (the track is expected to use one of them)
+    instrument: InstrumentId
+    voice: VoiceId
+    clef: Clef
+    is_solo: bool
+    is_disabled: bool  # for tracks that we want to ignore (eg a track that is present in a stub more than once)
 
-    def __init__(self, track_id: str, orchestra):
+    def __init__(self, track_id: TrackId, orchestra: Orchestra):
         """
         :param str track_id: the identifier of a track in the form "bb trombone 2 bc"
         :param Orchestra orchestra:
         """
-        assert isinstance(track_id, str)
+        assert isinstance(track_id, TrackId)
         self.orchestra = orchestra
         self.instrument = None
         self.voice = None
-        self.clef = None  # 'tc' for treble clef, 'bc' for bass clef
+        self.clef = None
         self.is_solo = False
-        self.is_disabled = False  # for tracks that we want to ignore (eg a track that is present in a stub more than once)
+        self.is_disabled = False
         parts = track_id.split(' ')
         instrument_first_part_index = 0
         instrument_last_part_index = len(parts) - 1
@@ -305,7 +311,7 @@ def __eq__(self, other):
         """
         return hash(self.get_id()) == hash(other.get_id())
 
-    def get_id(self):
+    def get_id(self) -> TrackId:
         """
         :return str: the identifier of this track in the form "bb trombone 2 tc"
         """
@@ -389,7 +395,7 @@ def __str__(self):
     def tracks(self) -> List[Track]:
         return self.track_to_page.keys()
 
-    def add_toc_item(self, track_id: str, page_index: int):
+    def add_toc_item(self, track_id: TrackId, page_index: int):
         """
         :param str track_id:
         :param int page_index:

diff --git a/src/pymusco/pdf.py b/src/pymusco/pdf.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3.8
+from typing import List, Tuple, Any
 import struct
 import subprocess
 import os
@@ -17,7 +18,7 @@
 # https://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python/34116472#34116472
 
 
-def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4):
+def tiff_header_for_ccitt(width: int, height: int, img_size: int, ccitt_group: int = 4):
     tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
     return struct.pack(tiff_header_struct,
                        b'II',  # Byte order indication: Little indian
@@ -36,7 +37,7 @@ def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4):
                        )
 
 
-def extract_pdf_stream_image(pdf_stream, image_dir, image_name):
+def extract_pdf_stream_image(pdf_stream: PyPDF2.generic.EncodedStreamObject, image_dir: Path, image_name: str):
     """
     :param PyPDF2.generic.EncodedStreamObject pdf_stream: a pdf node which is supposed to contain an image
     :param str image_dir: where to save the image of the given name_object
@@ -150,7 +151,7 @@ def extract_pdf_stream_image(pdf_stream, image_dir, image_name):
     return saved_image_file_path
 
 
-def find_pdf_page_raster_image(pdf_page):
+def find_pdf_page_raster_image(pdf_page: PyPDF2.PageObject) -> PyPDF2.generic.EncodedStreamObject:
     """
     finds the first raster image in this page
 
@@ -165,12 +166,12 @@ def find_pdf_page_raster_image(pdf_page):
     return None
 
 
-def extract_pdf_page_main_image(pdf_page: PyPDF2.PageObject, image_dir: Path, image_name: str):
+def extract_pdf_page_main_image(pdf_page: PyPDF2.PageObject, image_dir: Path, image_name: str) -> Path:
     """
     :param PyPDF2.pdf.PageObject pdf_page:
-    :param str image_dir: where to save the image of the given name_object
+    :param Path image_dir: where to save the image of the given name_object
     :param str image_name: the name of the saved file image, without file extension
-    :return str: the saved image file path with file extension
+    :return Path: the saved image file path with file extension
     """
     pdf_stream = find_pdf_page_raster_image(pdf_page)
 
@@ -219,12 +220,12 @@ def extract_pdf_page_main_image(pdf_page: PyPDF2.PageObject, image_dir: Path, im
     return saved_image_file_path
 
 
-def extract_pdf_page(pdf_page: PyPDF2.PageObject, image_dir: Path, image_name: str):
+def extract_pdf_page(pdf_page: PyPDF2.PageObject, image_dir: Path, image_name: str) -> Path:
     """
     :param PyPDF2.pdf.PageObject pdf_page:
-    :param str image_dir: where to save the image of the given name_object
+    :param Path image_dir: where to save the image of the given name_object
     :param str image_name: the name of the saved file image, without file extension
-    :return str: the saved image file path with file extension
+    :return Path: the saved image file path with file extension
     """
     saved_image_file_path = (image_dir / image_name).with_suffix('.pdf')
     with open(saved_image_file_path, 'wb') as pdf_file:
@@ -234,7 +235,7 @@ def extract_pdf_page(pdf_page: PyPDF2.PageObject, image_dir: Path, image_name: s
     return saved_image_file_path
 
 
-def extract_pdf_page_images(pdf_page, image_folder='/tmp'):
+def extract_pdf_page_images(pdf_page: PyPDF2.PageObject, image_folder='/tmp'):
     """
     :param PyPDF2.pdf.PageObject pdf_page:
     :param str image_folder:
@@ -271,8 +272,24 @@ def pdf_page_to_png(pdf_page: PyPDF2.PageObject, resolution=72) -> cv2.Mat:
 
     return image
 
-
-def add_bookmarks(pdf_in_filename, bookmarks_tree, pdf_out_filename=None):
+# example from https://python.hotexamples.com/site/file?hash=0xfd1eb9884f4c714b3c3d9173d13ed1b2d7175ca4d7ed3b64dcad71bec46326b9
+# bookmarks_tree example
+# [
+#     (u'Foreword', 0, []),
+#     (u'Chapter 1: Introduction', 1,
+#         [
+#             (u'1.1 Python', 1,
+#                 [
+#                     (u'1.1.1 Basic syntax', 1, []),
+#                     (u'1.1.2 Hello world', 2, [])
+#                 ]
+#             ),
+#             (u'1.2 Exercises', 3, [])
+#         ]
+#     ),
+#     (u'Chapter 2: Conclusion', 4, [])
+# ]
+def add_bookmarks(pdf_in_filename: Path, bookmarks_tree: List[Tuple[str, int, List[Any]]], pdf_out_filename: Path = None):
     """Add bookmarks to existing PDF files
     Home:
         https://github.com/RussellLuo/pdfbookmarker
@@ -313,7 +330,7 @@ def crawl_tree(tree, parent):
         pdf_out.write(output_stream)
 
 
-def add_stamp(src_pdf_file_path, dst_pdf_file_path, stamp_file_path, scale=1.0, tx=500.0, ty=770.0):
+def add_stamp(src_pdf_file_path: Path, dst_pdf_file_path: Path, stamp_file_path: Path, scale: float = 1.0, tx: float = 500.0, ty: float = 770.0):
     """
 
     warning! this function has a side effect : it removes the bookmark!
@@ -355,7 +372,7 @@ def add_stamp(src_pdf_file_path, dst_pdf_file_path, stamp_file_path, scale=1.0,
             shutil.copyfile(tmp_dst_pdf_file_path, dst_pdf_file_path)
 
 
-def check_pdf(src_pdf_file_path):
+def check_pdf(src_pdf_file_path: Path):
     """
     the purpose of this function is to detect inconsistencies in the given pdf file
     an exception is raised if the pdf is malformed