Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions .devcontainer/Dockerfile

This file was deleted.

43 changes: 22 additions & 21 deletions .github/workflows/python.yml → .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,39 +22,40 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12", "3.x"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run:
- apt update && apt install yamllint
- pip install uv
run: |
apt update && apt install yamllint
pip install uv

- name: Check code
run:
- yamllint .
- uv run mypy --check .
- uv run ruff check .
continue-on-error: true
run: |
yamllint .
uv run mypy --check .
uv run ruff check .

- name: Run tests
run: uv run pytest --junitxml=pytest.xml

# TODO: Look into github actions, these are out of date
# - name: Upload coverage data
# uses: actions/upload-artifact@v3
# with:
# name: coverage-data
# path: coverage.xml

# - name: Publish Test Report
# uses: mikepenz/action-junit-report@v3
# if: success() || failure()
# with:
# report_paths: unit_test.xml
# TODO: Look into github actions, these are out of date
# - name: Upload coverage data
# uses: actions/upload-artifact@v3
# with:
# name: coverage-data
# path: coverage.xml

# - name: Publish Test Report
# uses: mikepenz/action-junit-report@v3
# if: success() || failure()
# with:
# report_paths: unit_test.xml
1 change: 1 addition & 0 deletions .yamllint.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
ignore:
- .git/*
- .venv/*
- src/easyhla/default_data/hla_standards.yaml

extends: default

Expand Down
46 changes: 35 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["hatchling"]
requires = ["hatchling", "uv-dynamic-versioning"]
build-backend = "hatchling.build"

[project]
Expand Down Expand Up @@ -33,6 +33,9 @@ dependencies = [
"pyyaml>=6.0.2",
"requests>=2.32.3",
"typer>=0.15.2",
"types-pyyaml>=6.0.12.20250516",
"types-requests>=2.32.4.20250611",
"uv-dynamic-versioning>=0.8.2",
]

[dependency-groups]
Expand All @@ -54,9 +57,9 @@ dev = [
]

[project.urls]
Documentation = "https://github.com/unknown/easyhla#readme"
Issues = "https://github.com/unknown/easyhla/issues"
Source = "https://github.com/unknown/easyhla"
Documentation = "https://github.com/cfe-lab/pyeasyhla/blob/main/README.md"
Issues = "https://github.com/cfe-lab/pyeasyhla/issues"
Source = "https://github.com/cfe-lab/pyeasyhla"

[project.scripts]
clinical_hla = "easyhla.clinical_hla:main"
Expand All @@ -72,28 +75,48 @@ database = [
]

[tool.hatch.version]
path = "src/easyhla/__about__.py"
source = "uv-dynamic-versioning"

[tool.hatch.build]
include = [
"src/easyhla/*.py",
"src/easyhla/default_data/*.csv",
"src/easyhla/default_data/hla_nuc.fasta.mtime",
"src/easyhla/__about__.py",
"src/easyhla/__init__.py",
"src/easyhla/__main__.py",
"src/easyhla/easyhla.py",
"src/easyhla/interpret_from_json_lib.py",
"src/easyhla/interpret_from_json.py",
"src/easyhla/models.py",
"src/easyhla/py.typed",
"src/easyhla/update_alleles.py",
"src/easyhla/update_frequency_file_lib.py",
"src/easyhla/update_frequency_file.py",
"src/easyhla/utils.py",
"src/easyhla/default_data/hla_standards.yaml",
"src/easyhla/default_data/hla_frequencies.csv",
]
exclude = [
"tools",
"tests/output",
"tests/input",
"tests",
]
skip-excluded-dirs = true
directory = "output"

[tool.hatch.build.targets.wheel]
packages = ["src/easyhla"]

[tool.hatch.build.hooks.version]
path = "src/easyhla/_version.py"
template = '''
__version__ = "{version}"
'''

[tool.uv]
package = true

[tool.uv-dynamic-versioning]
vcs = "git"
style = "semver"
fallback-version = "0.0.0"

[tool.pytest.ini_options]
pythonpath = "src"
minversion = "6.0"
Expand Down Expand Up @@ -147,3 +170,4 @@ match = "src/**/*.py"
[tool.mypy]
plugins = ["numpy.typing.mypy_plugin"]
ignore_missing_imports = true
exclude = ["scripts/"]
1 change: 0 additions & 1 deletion src/easyhla/__about__.py

This file was deleted.

30 changes: 17 additions & 13 deletions src/easyhla/bblab.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
from pathlib import Path
from typing import Any, Optional

import Bio
import typer
from Bio.Seq import MutableSeq, Seq
from Bio.SeqIO import parse

from .bblab_lib import (
EXON_AND_OTHER_EXON,
HLAInterpretationRow,
HLAMismatchRow,
pair_exons,
)
from .easyhla import DATE_FORMAT, EXON_NAME, EasyHLA
from .easyhla import DATE_FORMAT, EasyHLA
from .models import HLAInterpretation, HLASequence
from .utils import EXON_NAME

logger = logging.Logger(__name__, logging.ERROR)

Expand Down Expand Up @@ -49,21 +51,21 @@ def log_and_print(


def report_unmatched_sequences(
unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]],
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
to_stdout: bool = False,
) -> None:
"""
Report exon sequences that did not have a matching exon.

:param unmatched: unmatched exon sequences, grouped by which exon they represent
:type unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
:type unmatched: dict[EXON_NAME, dict[str, Seq]]
:param to_stdout: ..., defaults to None
:type to_stdout: Optional[bool], optional
"""
for exon, other_exon in EXON_AND_OTHER_EXON:
for entry in unmatched[exon]:
for sequence_id in unmatched[exon].keys():
log_and_print(
f"No matching {other_exon} for {entry.description}",
f"No matching {other_exon} for {sequence_id}",
to_stdout=to_stdout,
)

Expand All @@ -79,6 +81,8 @@ def process_from_file_to_files(
):
if threshold and threshold < 0:
raise RuntimeError("Threshold must be >=0 or None!")
elif threshold is None:
threshold = 0

rows: list[HLAInterpretationRow] = []
mismatch_rows: list[HLAMismatchRow] = []
Expand All @@ -93,13 +97,13 @@ def process_from_file_to_files(
)

matched_sequences: list[HLASequence]
unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]

with open(filename, "r", encoding="utf-8") as f:
matched_sequences, unmatched = pair_exons(
Bio.SeqIO.parse(f, "fasta"),
parse(f, "fasta"),
locus.value,
list(hla_alg.standards.values())[0],
list(hla_alg.hla_standards[locus.value].values())[0],
)

for hla_sequence in matched_sequences:
Expand Down Expand Up @@ -133,10 +137,10 @@ def process_from_file_to_files(
row: HLAInterpretationRow = HLAInterpretationRow.summary_row(result)
rows.append(row)

mismatch_rows.extend(result.mismatch_rows())
mismatch_rows.extend(HLAMismatchRow.mismatch_rows(result))

npats += 1
nseqs += hla_sequence.num_seqs
nseqs += hla_sequence.num_sequences_used

report_unmatched_sequences(unmatched, to_stdout=to_stdout)

Expand Down Expand Up @@ -171,11 +175,11 @@ def process_from_file_to_files(
),
)
mismatch_csv.writeheader()
mismatch_csv.writerows([dict[row] for row in mismatch_rows])
mismatch_csv.writerows([dict(row) for row in mismatch_rows])

log_and_print(
f"{npats} patients, {nseqs} sequences processed.",
log_level=logger.INFO,
log_level=logging.INFO,
to_stdout=to_stdout,
)

Expand Down
20 changes: 10 additions & 10 deletions src/easyhla/bblab_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import TypedDict

import numpy as np
from Bio.Seq import Seq
from Bio.Seq import MutableSeq, Seq
from Bio.SeqIO import SeqRecord
from pydantic import BaseModel

Expand Down Expand Up @@ -36,7 +36,7 @@

def pair_exons_helper(
sequence_record: SeqRecord,
unmatched: dict[EXON_NAME, dict[str, Seq]],
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
) -> tuple[str, bool, bool, str, str]:
"""
Helper that attempts to match the given sequence with a "partner" exon.
Expand All @@ -55,7 +55,7 @@ def pair_exons_helper(
- exon3 sequence
"""
# The `id`` field is expected to hold the sample name.
samp: str = sequence_record.id
samp: str = sequence_record.id or ""
is_exon: bool = False
matched: bool = False
exon2: str = ""
Expand Down Expand Up @@ -98,7 +98,7 @@ def pair_exons(
sequence_records: Iterable[SeqRecord],
locus: HLA_LOCUS,
example_standard: HLAStandard,
) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq]]]:
) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]]:
"""
Pair exons in the given input sequences.

Expand All @@ -109,7 +109,7 @@ def pair_exons(
sequences and attempt to match them up.
"""
matched_sequences: list[HLASequence] = []
unmatched: dict[EXON_NAME, dict[str, Seq]] = {
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]] = {
"exon2": {},
"exon3": {},
}
Expand All @@ -118,7 +118,7 @@ def pair_exons(
# Skip over any sequences that aren't the right length or contain
# bad bases.
try:
check_length(locus, str(sr.seq), sr.id)
check_length(locus, str(sr.seq), sr.id or "")
except BadLengthException:
continue

Expand Down Expand Up @@ -147,21 +147,21 @@ def pair_exons(
exon3_bin = pad_short(example_standard.sequence, nuc2bin(exon3), "exon3")
matched_sequences.append(
HLASequence(
two=(int(x) for x in exon2_bin),
two=tuple(int(x) for x in exon2_bin),
intron=(),
three=(int(x) for x in exon3_bin),
three=tuple(int(x) for x in exon3_bin),
name=identifier,
locus=locus,
num_sequences_used=2,
)
)
else:
seq_numpy: np.array = pad_short(
seq_numpy: np.ndarray = pad_short(
example_standard.sequence,
nuc2bin(sr.seq), # type: ignore
None,
)
seq: tuple[int] = tuple(int(x) for x in seq_numpy)
seq: tuple[int, ...] = tuple(int(x) for x in seq_numpy)
matched_sequences.append(
HLASequence(
two=seq[:EXON2_LENGTH],
Expand Down
Loading