cfe-lab · rhliang · Aug 7, 2025 · Aug 2, 2025 · Aug 2, 2025 · Aug 2, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
diff --git a/.github/workflows/python.yml → .github/workflows/test.yml b/.github/workflows/python.yml → .github/workflows/test.yml
@@ -22,39 +22,40 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.x"]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
 
       - name: Install dependencies
-        run:
-          - apt update && apt install yamllint
-          - pip install uv
+        run: |
+          apt update && apt install yamllint
+          pip install uv
 
       - name: Check code
-        run:
-          - yamllint .
-          - uv run mypy --check .
-          - uv run ruff check .
+        continue-on-error: true
+        run: |
+          yamllint .
+          uv run mypy --check .
+          uv run ruff check .
 
       - name: Run tests
         run: uv run pytest --junitxml=pytest.xml
 
-      # TODO: Look into github actions, these are out of date
-      # - name: Upload coverage data
-      #   uses: actions/upload-artifact@v3
-      #   with:
-      #     name: coverage-data
-      #     path: coverage.xml
-
-      # - name: Publish Test Report
-      #   uses: mikepenz/action-junit-report@v3
-      #   if: success() || failure()
-      #   with:
-      #     report_paths: unit_test.xml
+# TODO: Look into github actions, these are out of date
+# - name: Upload coverage data
+#   uses: actions/upload-artifact@v3
+#   with:
+#     name: coverage-data
+#     path: coverage.xml
+
+# - name: Publish Test Report
+#   uses: mikepenz/action-junit-report@v3
+#   if: success() || failure()
+#   with:
+#     report_paths: unit_test.xml
diff --git a/.yamllint.yml b/.yamllint.yml
@@ -1,6 +1,7 @@
 ignore:
   - .git/*
   - .venv/*
+  - src/easyhla/default_data/hla_standards.yaml
 
 extends: default
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["hatchling"]
+requires = ["hatchling", "uv-dynamic-versioning"]
 build-backend = "hatchling.build"
 
 [project]
@@ -33,6 +33,9 @@ dependencies = [
   "pyyaml>=6.0.2",
   "requests>=2.32.3",
   "typer>=0.15.2",
+  "types-pyyaml>=6.0.12.20250516",
+  "types-requests>=2.32.4.20250611",
+  "uv-dynamic-versioning>=0.8.2",
 ]
 
 [dependency-groups]
@@ -54,9 +57,9 @@ dev = [
 ]
 
 [project.urls]
-Documentation = "https://github.com/unknown/easyhla#readme"
-Issues = "https://github.com/unknown/easyhla/issues"
-Source = "https://github.com/unknown/easyhla"
+Documentation = "https://github.com/cfe-lab/pyeasyhla/blob/main/README.md"
+Issues = "https://github.com/cfe-lab/pyeasyhla/issues"
+Source = "https://github.com/cfe-lab/pyeasyhla"
 
 [project.scripts]
 clinical_hla = "easyhla.clinical_hla:main"
@@ -72,28 +75,48 @@ database = [
 ]
 
 [tool.hatch.version]
-path = "src/easyhla/__about__.py"
+source = "uv-dynamic-versioning"
 
 [tool.hatch.build]
 include = [
-  "src/easyhla/*.py",
-  "src/easyhla/default_data/*.csv",
-  "src/easyhla/default_data/hla_nuc.fasta.mtime",
+  "src/easyhla/__about__.py",
+  "src/easyhla/__init__.py",
+  "src/easyhla/__main__.py",
+  "src/easyhla/easyhla.py",
+  "src/easyhla/interpret_from_json_lib.py",
+  "src/easyhla/interpret_from_json.py",
+  "src/easyhla/models.py",
+  "src/easyhla/py.typed",
+  "src/easyhla/update_alleles.py",
+  "src/easyhla/update_frequency_file_lib.py",
+  "src/easyhla/update_frequency_file.py",
+  "src/easyhla/utils.py",
+  "src/easyhla/default_data/hla_standards.yaml",
+  "src/easyhla/default_data/hla_frequencies.csv",
 ]
 exclude = [
-  "tools",
-  "tests/output",
-  "tests/input",
+  "tests",
 ]
 skip-excluded-dirs = true
 directory = "output"
 
 [tool.hatch.build.targets.wheel]
 packages = ["src/easyhla"]
 
+[tool.hatch.build.hooks.version]
+path = "src/easyhla/_version.py"
+template = '''
+__version__ = "{version}"
+'''
+
 [tool.uv]
 package = true
 
+[tool.uv-dynamic-versioning]
+vcs = "git"
+style = "semver"
+fallback-version = "0.0.0"
+
 [tool.pytest.ini_options]
 pythonpath = "src"
 minversion = "6.0"
@@ -147,3 +170,4 @@ match = "src/**/*.py"
 [tool.mypy]
 plugins = ["numpy.typing.mypy_plugin"]
 ignore_missing_imports = true
+exclude = ["scripts/"]
diff --git a/src/easyhla/__about__.py b/src/easyhla/__about__.py
diff --git a/src/easyhla/bblab.py b/src/easyhla/bblab.py
@@ -5,17 +5,19 @@
 from pathlib import Path
 from typing import Any, Optional
 
-import Bio
 import typer
+from Bio.Seq import MutableSeq, Seq
+from Bio.SeqIO import parse
 
 from .bblab_lib import (
     EXON_AND_OTHER_EXON,
     HLAInterpretationRow,
     HLAMismatchRow,
     pair_exons,
 )
-from .easyhla import DATE_FORMAT, EXON_NAME, EasyHLA
+from .easyhla import DATE_FORMAT, EasyHLA
 from .models import HLAInterpretation, HLASequence
+from .utils import EXON_NAME
 
 logger = logging.Logger(__name__, logging.ERROR)
 
@@ -49,21 +51,21 @@ def log_and_print(
 
 
 def report_unmatched_sequences(
-    unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]],
+    unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
     to_stdout: bool = False,
 ) -> None:
     """
     Report exon sequences that did not have a matching exon.
 
     :param unmatched: unmatched exon sequences, grouped by which exon they represent
-    :type unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
+    :type unmatched: dict[EXON_NAME, dict[str, Seq]]
     :param to_stdout: ..., defaults to None
     :type to_stdout: Optional[bool], optional
     """
     for exon, other_exon in EXON_AND_OTHER_EXON:
-        for entry in unmatched[exon]:
+        for sequence_id in unmatched[exon].keys():
             log_and_print(
-                f"No matching {other_exon} for {entry.description}",
+                f"No matching {other_exon} for {sequence_id}",
                 to_stdout=to_stdout,
             )
 
@@ -79,6 +81,8 @@ def process_from_file_to_files(
 ):
     if threshold and threshold < 0:
         raise RuntimeError("Threshold must be >=0 or None!")
+    elif threshold is None:
+        threshold = 0
 
     rows: list[HLAInterpretationRow] = []
     mismatch_rows: list[HLAMismatchRow] = []
@@ -93,13 +97,13 @@ def process_from_file_to_files(
     )
 
     matched_sequences: list[HLASequence]
-    unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
+    unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]
 
     with open(filename, "r", encoding="utf-8") as f:
         matched_sequences, unmatched = pair_exons(
-            Bio.SeqIO.parse(f, "fasta"),
+            parse(f, "fasta"),
             locus.value,
-            list(hla_alg.standards.values())[0],
+            list(hla_alg.hla_standards[locus.value].values())[0],
         )
 
     for hla_sequence in matched_sequences:
@@ -133,10 +137,10 @@ def process_from_file_to_files(
         row: HLAInterpretationRow = HLAInterpretationRow.summary_row(result)
         rows.append(row)
 
-        mismatch_rows.extend(result.mismatch_rows())
+        mismatch_rows.extend(HLAMismatchRow.mismatch_rows(result))
 
         npats += 1
-        nseqs += hla_sequence.num_seqs
+        nseqs += hla_sequence.num_sequences_used
 
     report_unmatched_sequences(unmatched, to_stdout=to_stdout)
 
@@ -171,11 +175,11 @@ def process_from_file_to_files(
             ),
         )
         mismatch_csv.writeheader()
-        mismatch_csv.writerows([dict[row] for row in mismatch_rows])
+        mismatch_csv.writerows([dict(row) for row in mismatch_rows])
 
     log_and_print(
         f"{npats} patients, {nseqs} sequences processed.",
-        log_level=logger.INFO,
+        log_level=logging.INFO,
         to_stdout=to_stdout,
     )
 

diff --git a/src/easyhla/bblab_lib.py b/src/easyhla/bblab_lib.py
@@ -3,7 +3,7 @@
 from typing import TypedDict
 
 import numpy as np
-from Bio.Seq import Seq
+from Bio.Seq import MutableSeq, Seq
 from Bio.SeqIO import SeqRecord
 from pydantic import BaseModel
 
@@ -36,7 +36,7 @@
 
 def pair_exons_helper(
     sequence_record: SeqRecord,
-    unmatched: dict[EXON_NAME, dict[str, Seq]],
+    unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
 ) -> tuple[str, bool, bool, str, str]:
     """
     Helper that attempts to match the given sequence with a "partner" exon.
@@ -55,7 +55,7 @@ def pair_exons_helper(
     - exon3 sequence
     """
     # The `id`` field is expected to hold the sample name.
-    samp: str = sequence_record.id
+    samp: str = sequence_record.id or ""
     is_exon: bool = False
     matched: bool = False
     exon2: str = ""
@@ -98,7 +98,7 @@ def pair_exons(
     sequence_records: Iterable[SeqRecord],
     locus: HLA_LOCUS,
     example_standard: HLAStandard,
-) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq]]]:
+) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]]:
     """
     Pair exons in the given input sequences.
 
@@ -109,7 +109,7 @@ def pair_exons(
     sequences and attempt to match them up.
     """
     matched_sequences: list[HLASequence] = []
-    unmatched: dict[EXON_NAME, dict[str, Seq]] = {
+    unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]] = {
         "exon2": {},
         "exon3": {},
     }
@@ -118,7 +118,7 @@ def pair_exons(
         # Skip over any sequences that aren't the right length or contain
         # bad bases.
         try:
-            check_length(locus, str(sr.seq), sr.id)
+            check_length(locus, str(sr.seq), sr.id or "")
         except BadLengthException:
             continue
 
@@ -147,21 +147,21 @@ def pair_exons(
             exon3_bin = pad_short(example_standard.sequence, nuc2bin(exon3), "exon3")
             matched_sequences.append(
                 HLASequence(
-                    two=(int(x) for x in exon2_bin),
+                    two=tuple(int(x) for x in exon2_bin),
                     intron=(),
-                    three=(int(x) for x in exon3_bin),
+                    three=tuple(int(x) for x in exon3_bin),
                     name=identifier,
                     locus=locus,
                     num_sequences_used=2,
                 )
             )
         else:
-            seq_numpy: np.array = pad_short(
+            seq_numpy: np.ndarray = pad_short(
                 example_standard.sequence,
                 nuc2bin(sr.seq),  # type: ignore
                 None,
             )
-            seq: tuple[int] = tuple(int(x) for x in seq_numpy)
+            seq: tuple[int, ...] = tuple(int(x) for x in seq_numpy)
             matched_sequences.append(
                 HLASequence(
                     two=seq[:EXON2_LENGTH],