diff --git a/.github/workflows/build_and_publish_gem.yml b/.github/workflows/build_and_publish_gem.yml new file mode 100644 index 0000000..77a3f0e --- /dev/null +++ b/.github/workflows/build_and_publish_gem.yml @@ -0,0 +1,41 @@ +name: Build and publish the Ruby package + +on: + release: + types: [published] + +jobs: + build_gem: + runs-on: ubuntu-latest + + env: + HLA_ALGORITHM_VERSION: ${{ github.ref_name }} + BUILD_PATH: ${{ github.workspace }}/ruby + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Install Ruby + run: | + sudo apt update + sudo apt install -y ruby + + - name: Checkout code from repo + uses: actions/checkout@v4 + + - name: Build the Ruby package + run: | + cd $BUILD_PATH + gem build ${BUILD_PATH}/hla_algorithm.gemspec + + - name: Publish gem to GitHub Packages + run: | + mkdir -p $HOME/.gem + touch $HOME/.gem/credentials + chmod 0600 $HOME/.gem/credentials + printf -- "---\n:github: Bearer ${GH_TOKEN}\n" > $HOME/.gem/credentials + gem push --KEY github --host https://rubygems.pkg.github.com/${OWNER} ${BUILD_PATH}/*.gem + env: + OWNER: ${{ github.repository_owner }} + + - name: Add gem as a release asset + run: gh release upload $HLA_ALGORITHM_VERSION ${BUILD_PATH}/*.gem diff --git a/pyproject.toml b/pyproject.toml index e88e1c0..28ee27e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,7 +102,7 @@ package = true [tool.uv-dynamic-versioning] vcs = "git" -style = "semver" +style = "pep440" fallback-version = "0.0.0" [tool.pytest.ini_options] diff --git a/ruby/lib/hla_algorithm.rb b/ruby/lib/hla_algorithm.rb index 02cd5de..9ccf95c 100644 --- a/ruby/lib/hla_algorithm.rb +++ b/ruby/lib/hla_algorithm.rb @@ -5,7 +5,7 @@ HLA_INTERPRET_FROM_JSON = ENV['HLA_INTERPRET_FROM_JSON'] if HLA_INTERPRET_FROM_JSON.nil? - raise "HLA_INTERPRET_FROM_JSON must be set" + raise 'HLA_INTERPRET_FROM_JSON must be set' end @@ -26,18 +26,21 @@ class HLAResult ) def initialize(raw_result) - @seqs = raw_result["seqs"] - @alleles_all = raw_result["alleles_all"] - @alleles_clean = raw_result["alleles_clean"] - @alleles_for_mismatches = raw_result["alleles_for_mismatches"] - @mismatches = raw_result["mismatches"] - @ambiguous = raw_result["ambiguous"] - @homozygous = raw_result["homozygous"] - @locus = raw_result["locus"] - @alg_version = raw_result["alg_version"] - @b5701 = raw_result["b5701"] - @dist_b5701 = raw_result["dist_b5701"] - @errors = raw_result["errors"] + @seqs = raw_result['seqs'] + @alleles_all = raw_result['alleles_all'] + @alleles_clean = raw_result['alleles_clean'] + @alleles_for_mismatches = raw_result['alleles_for_mismatches'] + @mismatches = raw_result['mismatches'] + @ambiguous = raw_result['ambiguous'] + @homozygous = raw_result['homozygous'] + @locus = raw_result['locus'] + @alg_version = raw_result['alg_version'] + @alleles_version = raw_result['alleles_version'] + @alleles_last_updated = raw_result['alleles_last_updated'] + @b5701 = raw_result['b5701'] + @dist_b5701 = raw_result['dist_b5701'] + @errors = raw_result['errors'] + @all_mismatches = raw_result['all_mismatches'] end end @@ -51,20 +54,21 @@ def initialize( @hla_freq_path = hla_freq_path end - def analyze(seqs, locus='B') + def analyze(seqs, locus='B', threshold=nil) hla_input = { - "seq1" => seqs[0], - "seq2" => seqs[1], - "locus" => locus, - "hla_std_path" => nil, - "hla_freq_path" => nil + 'seq1' => seqs[0], + 'seq2' => seqs[1], + 'locus' => locus, + 'threshold' => threshold, + 'hla_std_path' => nil, + 'hla_freq_path' => nil } if (!@hla_std_path.nil?) - hla_input["hla_std_path"] = File.expand_path(@hla_std_path) + hla_input['hla_std_path'] = File.expand_path(@hla_std_path) end if (!@hla_freq_path.nil?) - hla_input["hla_freq_path"] = File.expand_path(@hla_freq_path) + hla_input['hla_freq_path'] = File.expand_path(@hla_freq_path) end python_stdout, python_stderr, wait_thread = Open3.capture3( @@ -73,7 +77,7 @@ def analyze(seqs, locus='B') ) if !wait_thread.success? - error_msg = "HLA algorithm failed with exit code "\ + error_msg = 'HLA algorithm failed with exit code '\ "#{wait_thread.value}. Error output:\n"\ "#{python_stderr}" raise error_msg diff --git a/src/hla_algorithm/hla_algorithm.py b/src/hla_algorithm/hla_algorithm.py index 433673c..fc35cc6 100644 --- a/src/hla_algorithm/hla_algorithm.py +++ b/src/hla_algorithm/hla_algorithm.py @@ -134,7 +134,7 @@ def load_default_hla_standards() -> LoadedStandards: :return: List of known HLA standards :rtype: list[HLAStandard] """ - standards_filename: str = os.path.join( + standards_filename: str = HLAAlgorithm._path_join_shim( os.path.dirname(__file__), "default_data", "hla_standards.yaml", @@ -192,6 +192,13 @@ def read_hla_frequencies( hla_freqs[locus][protein_pair] += 1 return hla_freqs + @staticmethod + def _path_join_shim(*args) -> str: + """ + A shim for os.path.join which allows us to mock out the method easily in testing. + """ + return os.path.join(*args) + @staticmethod def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]: """ @@ -201,7 +208,7 @@ def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]] :rtype: dict[HLA_LOCUS, dict[HLAProteinPair, int]] """ hla_freqs: dict[HLA_LOCUS, dict[HLAProteinPair, int]] - default_frequencies_filename: str = os.path.join( + default_frequencies_filename: str = HLAAlgorithm._path_join_shim( os.path.dirname(__file__), "default_data", "hla_frequencies.csv", @@ -282,9 +289,8 @@ def combine_standards_stepper( mismatches = combos[combined_std_bin] else: - seq_mask = np.full_like(std_bin, fill_value=15) # Note that seq is implicitly cast to a NumPy array: - mismatches = np.count_nonzero((std_bin ^ seq) & seq_mask != 0) + mismatches = np.count_nonzero(std_bin ^ seq != 0) combos[combined_std_bin] = mismatches # cache this value if mismatches > current_rejection_threshold: @@ -335,7 +341,9 @@ def combine_standards( combined_std_bin, mismatches, allele_pair, - ) in HLAAlgorithm.combine_standards_stepper(matching_stds, seq, mismatch_threshold): + ) in HLAAlgorithm.combine_standards_stepper( + matching_stds, seq, mismatch_threshold + ): if combined_std_bin not in combos: combos[combined_std_bin] = (mismatches, []) combos[combined_std_bin][1].append(allele_pair) @@ -404,8 +412,8 @@ def get_mismatches( mislist.append( HLAMismatch( index=dex, - observed_base=BIN2NUC[sequence_bin[index]], - expected_base=BIN2NUC[correct_base_bin], + sequence_base=BIN2NUC[sequence_bin[index]], + standard_base=BIN2NUC[correct_base_bin], ) ) @@ -459,14 +467,13 @@ def interpret( hla_sequence=hla_sequence, matches={ combined_std: HLAMatchDetails( - mismatch_count=mismatch_count, mismatches=self.get_mismatches( combined_std.standard_bin, seq, locus, ), ) - for combined_std, mismatch_count in all_combos.items() + for combined_std in all_combos }, allele_frequencies=self.hla_frequencies[locus], b5701_standards=b5701_standards, diff --git a/src/hla_algorithm/interpret_from_json.py b/src/hla_algorithm/interpret_from_json.py index d76f805..554c68f 100644 --- a/src/hla_algorithm/interpret_from_json.py +++ b/src/hla_algorithm/interpret_from_json.py @@ -38,8 +38,14 @@ def main(): hla_input.hla_std_path, hla_input.hla_freq_path, ) - interp: HLAInterpretation = hla_alg.interpret(hla_input.hla_sequence()) - print(HLAResult.build_from_interpretation(interp).model_dump_json()) + interp: HLAInterpretation = hla_alg.interpret( + hla_input.hla_sequence(), hla_input.threshold + ) + print( + HLAResult.build_from_interpretation( + interp, hla_alg.tag, hla_alg.last_updated + ).model_dump_json() + ) if __name__ == "__main__": diff --git a/src/hla_algorithm/interpret_from_json_lib.py b/src/hla_algorithm/interpret_from_json_lib.py index 2d1b69e..e8a5f27 100644 --- a/src/hla_algorithm/interpret_from_json_lib.py +++ b/src/hla_algorithm/interpret_from_json_lib.py @@ -1,3 +1,4 @@ +from datetime import datetime from typing import Optional from pydantic import BaseModel, Field @@ -24,6 +25,7 @@ class HLAInput(BaseModel): seq1: str seq2: Optional[str] locus: HLA_LOCUS + threshold: Optional[int] = None hla_std_path: Optional[str] = None hla_freq_path: Optional[str] = None @@ -81,6 +83,22 @@ def hla_sequence(self) -> HLASequence: ) +class HLAMatchAdaptor(BaseModel): + """ + An "adaptor" for HLAMatchDetails for inclusion in an HLAResult. + """ + + mismatch_count: int + mismatches: list[str] + + @classmethod + def from_match_details(cls, match: HLAMatchDetails) -> "HLAMatchAdaptor": + return cls( + mismatch_count=match.mismatch_count, + mismatches=[str(x) for x in match.mismatches], + ) + + class HLAResult(BaseModel): seqs: list[str] = Field(default_factory=list) alleles_all: list[str] = Field(default_factory=list) @@ -91,12 +109,20 @@ class HLAResult(BaseModel): homozygous: bool = False locus: HLA_LOCUS = "B" alg_version: str = __version__ + alleles_version: str = "" + alleles_last_updated: datetime = Field(default_factory=datetime.now) b5701: bool = False dist_b5701: Optional[int] = None errors: list[str] = Field(default_factory=list) + all_mismatches: dict[str, HLAMatchAdaptor] = Field(default_factory=dict) @classmethod - def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult": + def build_from_interpretation( + cls, + interp: HLAInterpretation, + alleles_version: str, + alleles_last_updated: datetime, + ) -> "HLAResult": aps: AllelePairs = interp.best_matching_allele_pairs() # Pick one of the combined standards represented by what goes into @@ -124,6 +150,12 @@ def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult": ambiguous=aps.is_ambiguous(), homozygous=aps.is_homozygous(), locus=interp.locus, + alleles_version=alleles_version, + alleles_last_updated=alleles_last_updated, b5701=interp.is_b5701(), dist_b5701=interp.distance_from_b7501(), + all_mismatches={ + cs.get_allele_pair_str(): HLAMatchAdaptor.from_match_details(match) + for cs, match in interp.matches.items() + }, ) diff --git a/src/hla_algorithm/models.py b/src/hla_algorithm/models.py index 3769bdb..591e007 100644 --- a/src/hla_algorithm/models.py +++ b/src/hla_algorithm/models.py @@ -98,17 +98,20 @@ def get_allele_pair_str(self): class HLAMismatch(BaseModel): index: int - observed_base: str - expected_base: str + sequence_base: str + standard_base: str def __str__(self): - return f"{self.index}:{self.observed_base}->{self.expected_base}" + return f"{self.index}:{self.sequence_base}->{self.standard_base}" class HLAMatchDetails(BaseModel): - mismatch_count: int mismatches: list[HLAMismatch] + @property + def mismatch_count(self) -> int: + return len(self.mismatches) + class HLAProteinPair(BaseModel): # Allows this to be hashable: diff --git a/tests/hla_algorithm_test.py b/tests/hla_algorithm_test.py index 71916f8..0ff5e70 100644 --- a/tests/hla_algorithm_test.py +++ b/tests/hla_algorithm_test.py @@ -4,7 +4,7 @@ from io import StringIO from pathlib import Path from typing import Optional, cast -from unittest.mock import MagicMock, _Call +from unittest.mock import MagicMock, _Call, patch import numpy as np import pytest @@ -1024,84 +1024,84 @@ def test_combine_standards( [1, 2, 4, 8], [4, 2, 4, 8], ["A", "B", "C"], - [HLAMismatch(index=1, observed_base="G", expected_base="A")], + [HLAMismatch(index=1, sequence_base="G", standard_base="A")], id="mismatch_at_beginning", ), pytest.param( [1, 2, 4, 8], [1, 2, 4, 1], ["A", "B", "C"], - [HLAMismatch(index=4, observed_base="A", expected_base="T")], + [HLAMismatch(index=4, sequence_base="A", standard_base="T")], id="mismatch_at_end", ), pytest.param( [1, 2, 4, 8], [1, 4, 4, 8], ["A", "B", "C"], - [HLAMismatch(index=2, observed_base="G", expected_base="C")], + [HLAMismatch(index=2, sequence_base="G", standard_base="C")], id="mismatch_in_middle", ), pytest.param( [1, 2, 4, 8], [5, 2, 4, 8], ["A", "B", "C"], - [HLAMismatch(index=1, observed_base="R", expected_base="A")], + [HLAMismatch(index=1, sequence_base="R", standard_base="A")], id="mixture_seq_to_unambiguous_std_mismatch", ), pytest.param( [1, 2, 11, 8], [1, 2, 4, 8], ["A", "B", "C"], - [HLAMismatch(index=3, observed_base="G", expected_base="H")], + [HLAMismatch(index=3, sequence_base="G", standard_base="H")], id="unambiguous_seq_to_mixture_std_mismatch", ), pytest.param( [1, 2, 4, 3], [1, 2, 4, 5], ["A", "B", "C"], - [HLAMismatch(index=4, observed_base="R", expected_base="M")], + [HLAMismatch(index=4, sequence_base="R", standard_base="M")], id="mixture_seq_to_mixture_std_mismatch", ), pytest.param( [1] * 270 + [4] * 276, [1] * 200 + [4] + [1] * 69 + [4] * 276, ["A", "B", "C"], - [HLAMismatch(index=201, observed_base="G", expected_base="A")], + [HLAMismatch(index=201, sequence_base="G", standard_base="A")], id="indexing_not_modified_before_position_270", ), pytest.param( [1] * 269 + [3] + [4] * 276, [1] * 270 + [4] * 276, ["A", "B", "C"], - [HLAMismatch(index=270, observed_base="A", expected_base="M")], + [HLAMismatch(index=270, sequence_base="A", standard_base="M")], id="indexing_not_modified_at_position_270", ), pytest.param( [1] * 270 + [4] * 276, [1] * 270 + [14] + [4] * 275, ["A"], - [HLAMismatch(index=512, observed_base="B", expected_base="G")], + [HLAMismatch(index=512, sequence_base="B", standard_base="G")], id="locus_a_indexing_modified_at_position_271", ), pytest.param( [1] * 270 + [14] + [4] * 275, [1] * 270 + [4] * 276, ["B", "C"], - [HLAMismatch(index=271, observed_base="G", expected_base="B")], + [HLAMismatch(index=271, sequence_base="G", standard_base="B")], id="locus_b_c_indexing_not_modified_at_position_271", ), pytest.param( [1] * 270 + [4] * 276, [1] * 270 + [4] * 100 + [11] + [4] * 175, ["A"], - [HLAMismatch(index=612, observed_base="H", expected_base="G")], + [HLAMismatch(index=612, sequence_base="H", standard_base="G")], id="locus_a_indexing_modified_after_position_270", ), pytest.param( [1] * 270 + [4] * 100 + [11] + [4] * 175, [1] * 270 + [4] * 276, ["B", "C"], - [HLAMismatch(index=371, observed_base="G", expected_base="H")], + [HLAMismatch(index=371, sequence_base="G", standard_base="H")], id="locus_b_c_indexing_not_modified_after_position_270", ), pytest.param( @@ -1109,9 +1109,9 @@ def test_combine_standards( [1] * 270 + [4] * 100 + [4] * 50 + [11] + [4] * 125, ["A"], [ - HLAMismatch(index=171, observed_base="A", expected_base="M"), - HLAMismatch(index=512, observed_base="G", expected_base="H"), - HLAMismatch(index=662, observed_base="H", expected_base="A"), + HLAMismatch(index=171, sequence_base="A", standard_base="M"), + HLAMismatch(index=512, sequence_base="G", standard_base="H"), + HLAMismatch(index=662, sequence_base="H", standard_base="A"), ], id="locus_b_c_several_mismatches", ), @@ -1120,9 +1120,9 @@ def test_combine_standards( [1] * 270 + [4] * 100 + [4] * 50 + [11] + [4] * 125, ["B", "C"], [ - HLAMismatch(index=171, observed_base="A", expected_base="M"), - HLAMismatch(index=271, observed_base="G", expected_base="H"), - HLAMismatch(index=421, observed_base="H", expected_base="A"), + HLAMismatch(index=171, sequence_base="A", standard_base="M"), + HLAMismatch(index=271, sequence_base="G", standard_base="H"), + HLAMismatch(index=421, sequence_base="H", standard_base="A"), ], id="locus_b_c_several_mismatches", ), @@ -1232,59 +1232,54 @@ def test_get_mismatches_errors( HLACombinedStandard( standard_bin=(1, 2, 4, 8), possible_allele_pairs=(("std_allmatch", "std_allmatch"),), - ): HLAMatchDetails(mismatch_count=0, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), HLACombinedStandard( standard_bin=(1, 2, 4, 12), possible_allele_pairs=(("std_1mismatch", "std_allmatch"),), ): HLAMatchDetails( - mismatch_count=1, mismatches=[ - HLAMismatch(index=4, expected_base="K", observed_base="T"), + HLAMismatch(index=4, standard_base="K", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(1, 2, 4, 4), possible_allele_pairs=(("std_1mismatch", "std_1mismatch"),), ): HLAMatchDetails( - mismatch_count=1, mismatches=[ - HLAMismatch(index=4, expected_base="G", observed_base="T"), + HLAMismatch(index=4, standard_base="G", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(9, 6, 6, 9), possible_allele_pairs=(("std_allmatch", "std_allmismatch"),), ): HLAMatchDetails( - mismatch_count=4, mismatches=[ - HLAMismatch(index=1, expected_base="W", observed_base="A"), - HLAMismatch(index=2, expected_base="S", observed_base="C"), - HLAMismatch(index=3, expected_base="S", observed_base="G"), - HLAMismatch(index=4, expected_base="W", observed_base="T"), + HLAMismatch(index=1, standard_base="W", sequence_base="A"), + HLAMismatch(index=2, standard_base="S", sequence_base="C"), + HLAMismatch(index=3, standard_base="S", sequence_base="G"), + HLAMismatch(index=4, standard_base="W", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(9, 6, 6, 5), possible_allele_pairs=(("std_1mismatch", "std_allmismatch"),), ): HLAMatchDetails( - mismatch_count=4, mismatches=[ - HLAMismatch(index=1, expected_base="W", observed_base="A"), - HLAMismatch(index=2, expected_base="S", observed_base="C"), - HLAMismatch(index=3, expected_base="S", observed_base="G"), - HLAMismatch(index=4, expected_base="R", observed_base="T"), + HLAMismatch(index=1, standard_base="W", sequence_base="A"), + HLAMismatch(index=2, standard_base="S", sequence_base="C"), + HLAMismatch(index=3, standard_base="S", sequence_base="G"), + HLAMismatch(index=4, standard_base="R", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(8, 4, 2, 1), possible_allele_pairs=(("std_allmismatch", "std_allmismatch"),), ): HLAMatchDetails( - mismatch_count=4, mismatches=[ - HLAMismatch(index=1, expected_base="T", observed_base="A"), - HLAMismatch(index=2, expected_base="G", observed_base="C"), - HLAMismatch(index=3, expected_base="C", observed_base="G"), - HLAMismatch(index=4, expected_base="A", observed_base="T"), + HLAMismatch(index=1, standard_base="T", sequence_base="A"), + HLAMismatch(index=2, standard_base="G", sequence_base="C"), + HLAMismatch(index=3, standard_base="C", sequence_base="G"), + HLAMismatch(index=4, standard_base="A", sequence_base="T"), ], ), }, @@ -1336,59 +1331,54 @@ def test_get_mismatches_errors( HLACombinedStandard( standard_bin=(1, 2, 4, 8), possible_allele_pairs=(("B*57:01:01G", "B*57:01:01G"),), - ): HLAMatchDetails(mismatch_count=0, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), HLACombinedStandard( standard_bin=(1, 2, 4, 12), possible_allele_pairs=(("B*57:01:01G", "B*57:01:02"),), ): HLAMatchDetails( - mismatch_count=1, mismatches=[ - HLAMismatch(index=4, expected_base="K", observed_base="T"), + HLAMismatch(index=4, standard_base="K", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(1, 2, 4, 4), possible_allele_pairs=(("B*57:01:02", "B*57:01:02"),), ): HLAMatchDetails( - mismatch_count=1, mismatches=[ - HLAMismatch(index=4, expected_base="G", observed_base="T"), + HLAMismatch(index=4, standard_base="G", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(9, 6, 6, 9), possible_allele_pairs=(("B*57:01:01G", "B*57:01:03"),), ): HLAMatchDetails( - mismatch_count=4, mismatches=[ - HLAMismatch(index=1, expected_base="W", observed_base="A"), - HLAMismatch(index=2, expected_base="S", observed_base="C"), - HLAMismatch(index=3, expected_base="S", observed_base="G"), - HLAMismatch(index=4, expected_base="W", observed_base="T"), + HLAMismatch(index=1, standard_base="W", sequence_base="A"), + HLAMismatch(index=2, standard_base="S", sequence_base="C"), + HLAMismatch(index=3, standard_base="S", sequence_base="G"), + HLAMismatch(index=4, standard_base="W", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(9, 6, 6, 5), possible_allele_pairs=(("B*57:01:02", "B*57:01:03"),), ): HLAMatchDetails( - mismatch_count=4, mismatches=[ - HLAMismatch(index=1, expected_base="W", observed_base="A"), - HLAMismatch(index=2, expected_base="S", observed_base="C"), - HLAMismatch(index=3, expected_base="S", observed_base="G"), - HLAMismatch(index=4, expected_base="R", observed_base="T"), + HLAMismatch(index=1, standard_base="W", sequence_base="A"), + HLAMismatch(index=2, standard_base="S", sequence_base="C"), + HLAMismatch(index=3, standard_base="S", sequence_base="G"), + HLAMismatch(index=4, standard_base="R", sequence_base="T"), ], ), HLACombinedStandard( standard_bin=(8, 4, 2, 1), possible_allele_pairs=(("B*57:01:03", "B*57:01:03"),), ): HLAMatchDetails( - mismatch_count=4, mismatches=[ - HLAMismatch(index=1, expected_base="T", observed_base="A"), - HLAMismatch(index=2, expected_base="G", observed_base="C"), - HLAMismatch(index=3, expected_base="C", observed_base="G"), - HLAMismatch(index=4, expected_base="A", observed_base="T"), + HLAMismatch(index=1, standard_base="T", sequence_base="A"), + HLAMismatch(index=2, standard_base="G", sequence_base="C"), + HLAMismatch(index=3, standard_base="C", sequence_base="G"), + HLAMismatch(index=4, standard_base="A", sequence_base="T"), ], ), }, @@ -1520,7 +1510,9 @@ def test_interpret_error_cases( ): # Replace the standards with the ones in the test. for locus in ("A", "B", "C"): - hla_algorithm.hla_standards[locus] = {std.allele: std for std in raw_standards[locus]} + hla_algorithm.hla_standards[locus] = { + std.allele: std for std in raw_standards[locus] + } # Spy on the internals to make sure they're called correctly. get_matching_standards_spy: MagicMock = mocker.spy( @@ -1638,6 +1630,12 @@ def test_interpret_error_cases( } +def test_path_join_shim(): + expected_result: str = "/foo/bar/baz" + result: str = HLAAlgorithm._path_join_shim("/foo/bar", "baz") + assert expected_result == result + + @pytest.mark.parametrize( "raw_standards, raw_expected_result", [ @@ -1751,9 +1749,7 @@ def test_read_hla_standards( # Also try reading it from a file. p = tmp_path / "hla_standards.yaml" p.write_text(standards_file_str) - dirname_return_mock: MagicMock = mocker.MagicMock() - mocker.patch.object(os.path, "dirname", return_value=dirname_return_mock) - mocker.patch.object(os.path, "join", return_value=str(p)) + mocker.patch.object(HLAAlgorithm, "_path_join_shim", return_value=str(p)) load_result: LoadedStandards = HLAAlgorithm.load_default_hla_standards() assert load_result == expected_result @@ -2078,17 +2074,15 @@ def test_read_hla_frequencies( "B": expected_locus_b, "C": expected_locus_c, } - result: dict[HLA_LOCUS, dict[HLAProteinPair, int]] = HLAAlgorithm.read_hla_frequencies( - StringIO(frequencies_str) + result: dict[HLA_LOCUS, dict[HLAProteinPair, int]] = ( + HLAAlgorithm.read_hla_frequencies(StringIO(frequencies_str)) ) assert result == expected_results # Now try loading these from a file. p = tmp_path / "hla_frequencies.csv" p.write_text(frequencies_str) - dirname_return_mock: MagicMock = mocker.MagicMock() - mocker.patch.object(os.path, "dirname", return_value=dirname_return_mock) - mocker.patch.object(os.path, "join", return_value=str(p)) + mocker.patch.object(HLAAlgorithm, "_path_join_shim", return_value=str(p)) load_result: dict[HLA_LOCUS, dict[HLAProteinPair, int]] = ( HLAAlgorithm.load_default_hla_frequencies() ) @@ -2184,10 +2178,13 @@ def test_use_config_all_defaults( freq_path.write_text(fake_frequencies_str) mocker.patch.object( - os.path, "join", side_effect=[str(standards_path), str(freq_path)] + HLAAlgorithm, + "_path_join_shim", + side_effect=[os.fspath(standards_path), os.fspath(freq_path)], ) hla_algorithm: HLAAlgorithm = HLAAlgorithm.use_config() + assert hla_algorithm.tag == fake_stored_standards.tag assert hla_algorithm.last_updated == fake_stored_standards.last_updated assert hla_algorithm.hla_standards == READ_HLA_STANDARDS_TYPICAL_CASE_OUTPUT diff --git a/tests/interpret_from_json_lib_test.py b/tests/interpret_from_json_lib_test.py index 4ec2c45..ef2cc88 100644 --- a/tests/interpret_from_json_lib_test.py +++ b/tests/interpret_from_json_lib_test.py @@ -1,8 +1,9 @@ +from datetime import datetime from typing import Final, Optional import pytest -from hla_algorithm.interpret_from_json_lib import HLAInput, HLAResult +from hla_algorithm.interpret_from_json_lib import HLAInput, HLAMatchAdaptor, HLAResult from hla_algorithm.models import ( HLACombinedStandard, HLAInterpretation, @@ -31,15 +32,31 @@ def dummy_matches(locus: HLA_LOCUS) -> dict[HLACombinedStandard, HLAMatchDetails HLACombinedStandard( standard_bin=(1, 4, 9, 4), possible_allele_pairs=((f"{locus}*01:01:01", f"{locus}*02:02:02"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=55, sequence_base="A", standard_base="G"), + HLAMismatch(index=62, sequence_base="A", standard_base="R"), + ] + ), HLACombinedStandard( standard_bin=(1, 4, 9, 2), possible_allele_pairs=((f"{locus}*10:01:15", f"{locus}*20:02:03"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=45, sequence_base="T", standard_base="C"), + HLAMismatch(index=48, sequence_base="R", standard_base="C"), + ] + ), HLACombinedStandard( standard_bin=(2, 4, 9, 2), possible_allele_pairs=((f"{locus}*10:01:10", f"{locus}*20:22:20"),), - ): HLAMatchDetails(mismatch_count=3, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=45, sequence_base="T", standard_base="C"), + HLAMismatch(index=57, sequence_base="R", standard_base="Y"), + HLAMismatch(index=122, sequence_base="R", standard_base="G"), + ] + ), HLACombinedStandard( standard_bin=(2, 4, 10, 2), possible_allele_pairs=( @@ -47,26 +64,14 @@ def dummy_matches(locus: HLA_LOCUS) -> dict[HLACombinedStandard, HLAMatchDetails (f"{locus}*10:01:10", f"{locus}*111:22:22"), ), ): HLAMatchDetails( - mismatch_count=1, mismatches=[ - HLAMismatch(index=100, observed_base="A", expected_base="T"), - HLAMismatch(index=150, observed_base="T", expected_base="G"), - ], + HLAMismatch(index=100, sequence_base="A", standard_base="T"), + HLAMismatch(index=150, sequence_base="T", standard_base="G"), + ] ), } -def dummy_matches_no_mismatches( - locus: HLA_LOCUS, -) -> dict[HLACombinedStandard, HLAMatchDetails]: - return { - HLACombinedStandard( - standard_bin=(2, 2, 1, 2, 1, 4, 4, 2, 8), - possible_allele_pairs=((f"{locus}*01:01:01", f"{locus}*02:02:02"),), - ): HLAMatchDetails(mismatch_count=0, mismatches=[]), - } - - DUMMY_FREQUENCIES: Final[dict[HLAProteinPair, int]] = { HLAProteinPair( first_field_1="01", @@ -87,17 +92,24 @@ def dummy_matches_no_mismatches( standard_bin=(1, 4, 9, 4), possible_allele_pairs=(("B*57:01:01", "B*57:01:01"),), ): HLAMatchDetails( - mismatch_count=1, - mismatches=[HLAMismatch(index=3, observed_base="A", expected_base="W")], + mismatches=[HLAMismatch(index=3, sequence_base="A", standard_base="W")] ), HLACombinedStandard( standard_bin=(1, 4, 9, 2), possible_allele_pairs=(("B*57:01:15", "B*57:01:03"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[HLAMismatch(index=7, sequence_base="R", standard_base="W")] + ), HLACombinedStandard( standard_bin=(2, 4, 9, 2), possible_allele_pairs=(("B*57:02:33", "B*56:04:22"),), - ): HLAMatchDetails(mismatch_count=3, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=33, sequence_base="A", standard_base="C"), + HLAMismatch(index=36, sequence_base="A", standard_base="G"), + HLAMismatch(index=122, sequence_base="C", standard_base="R"), + ] + ), HLACombinedStandard( standard_bin=(2, 4, 10, 2), possible_allele_pairs=( @@ -105,11 +117,7 @@ def dummy_matches_no_mismatches( ("B*57:04:10", "B*57:01:22"), ), ): HLAMatchDetails( - mismatch_count=1, - mismatches=[ - HLAMismatch(index=100, observed_base="A", expected_base="T"), - HLAMismatch(index=150, observed_base="T", expected_base="G"), - ], + mismatches=[HLAMismatch(index=100, sequence_base="A", standard_base="T")], ), } @@ -346,13 +354,52 @@ def test_hla_input_hla_sequence_locus_bc(): @pytest.mark.parametrize( - "hla_sequence, matches, frequencies, b5701_standards, expected_result", + "raw_mismatches,raw_expected_result", + [ + pytest.param([], [], id="no_mismatches"), + pytest.param( + [HLAMismatch(index=100, sequence_base="A", standard_base="T")], + ["100:A->T"], + id="single_mismatch", + ), + pytest.param( + [ + HLAMismatch(index=100, sequence_base="A", standard_base="T"), + HLAMismatch(index=150, sequence_base="T", standard_base="G"), + HLAMismatch(index=157, sequence_base="C", standard_base="R"), + ], + ["100:A->T", "150:T->G", "157:C->R"], + id="multiple_mismatches", + ), + ], +) +def test_hla_match_adaptor_from_match_details( + raw_mismatches: list[HLAMismatch], + raw_expected_result: list[str], +): + match_details: HLAMatchDetails = HLAMatchDetails( + mismatch_count=len(raw_mismatches), mismatches=raw_mismatches + ) + expected_result: HLAMatchAdaptor = HLAMatchAdaptor( + mismatch_count=len(raw_expected_result), mismatches=raw_expected_result + ) + result: HLAMatchAdaptor = HLAMatchAdaptor.from_match_details(match_details) + assert result == expected_result + + +@pytest.mark.parametrize( + ( + "hla_sequence, matches, frequencies, b5701_standards, alleles_version, " + "alleles_last_updated, expected_result" + ), [ pytest.param( dummy_hla_sequence("A"), dummy_matches("A"), DUMMY_FREQUENCIES, None, + "v0.0.0-testing", + datetime(2025, 8, 12, 17, 0, 0), HLAResult( seqs=["CCACAGGCT"], alleles_all=[ @@ -367,8 +414,28 @@ def test_hla_input_hla_sequence_locus_bc(): ambiguous=True, homozygous=False, locus="A", + alleles_version="v0.0.0-testing", + alleles_last_updated=datetime(2025, 8, 12, 17, 0, 0), b5701=False, dist_b5701=None, + all_mismatches={ + "A*01:01:01 - A*02:02:02": HLAMatchAdaptor( + mismatch_count=2, + mismatches=["55:A->G", "62:A->R"], + ), + "A*10:01:15 - A*20:02:03": HLAMatchAdaptor( + mismatch_count=2, + mismatches=["45:T->C", "48:R->C"], + ), + "A*10:01:10 - A*20:22:20": HLAMatchAdaptor( + mismatch_count=3, + mismatches=["45:T->C", "57:R->Y", "122:R->G"], + ), + "A*10:01:10 - A*20:01|A*10:01:10 - A*111:22:22": HLAMatchAdaptor( + mismatch_count=2, + mismatches=["100:A->T", "150:T->G"], + ), + }, ), id="a_typical_case", ), @@ -377,6 +444,8 @@ def test_hla_input_hla_sequence_locus_bc(): MATCHES_FOR_B5701_CASES, FREQUENCIES_FOR_B5701_CASES, B5701_CASE_STANDARDS, + "v0.0.0-testing", + datetime(2025, 8, 12, 17, 0, 0), HLAResult( seqs=["CCAC", "AGGCT"], alleles_all=[ @@ -391,8 +460,28 @@ def test_hla_input_hla_sequence_locus_bc(): ambiguous=False, homozygous=True, locus="B", + alleles_version="v0.0.0-testing", + alleles_last_updated=datetime(2025, 8, 12, 17, 0, 0), b5701=True, dist_b5701=1, + all_mismatches={ + "B*57:01:01 - B*57:01:01": HLAMatchAdaptor( + mismatch_count=1, + mismatches=["3:A->W"], + ), + "B*57:01:15 - B*57:01:03": HLAMatchAdaptor( + mismatch_count=1, + mismatches=["7:R->W"], + ), + "B*57:02:33 - B*56:04:22": HLAMatchAdaptor( + mismatch_count=3, + mismatches=["33:A->C", "36:A->G", "122:C->R"], + ), + "B*57:02:10 - B*57:01:01:03N|B*57:04:10 - B*57:01:22": HLAMatchAdaptor( + mismatch_count=1, + mismatches=["100:A->T"], + ), + }, ), id="b_typical_case", ), @@ -401,6 +490,8 @@ def test_hla_input_hla_sequence_locus_bc(): dummy_matches("C"), DUMMY_FREQUENCIES, None, + "v0.0.0-testing", + datetime(2025, 8, 12, 17, 0, 0), HLAResult( seqs=["CCAC", "AGGCT"], alleles_all=[ @@ -415,8 +506,28 @@ def test_hla_input_hla_sequence_locus_bc(): ambiguous=True, homozygous=False, locus="C", + alleles_version="v0.0.0-testing", + alleles_last_updated=datetime(2025, 8, 12, 17, 0, 0), b5701=False, dist_b5701=None, + all_mismatches={ + "C*01:01:01 - C*02:02:02": HLAMatchAdaptor( + mismatch_count=2, + mismatches=["55:A->G", "62:A->R"], + ), + "C*10:01:15 - C*20:02:03": HLAMatchAdaptor( + mismatch_count=2, + mismatches=["45:T->C", "48:R->C"], + ), + "C*10:01:10 - C*20:22:20": HLAMatchAdaptor( + mismatch_count=3, + mismatches=["45:T->C", "57:R->Y", "122:R->G"], + ), + "C*10:01:10 - C*20:01|C*10:01:10 - C*111:22:22": HLAMatchAdaptor( + mismatch_count=2, + mismatches=["100:A->T", "150:T->G"], + ), + }, ), id="c_typical_case", ), @@ -427,6 +538,8 @@ def test_hla_result_build_from_interpretation( matches: dict[HLACombinedStandard, HLAMatchDetails], frequencies: dict[HLAProteinPair, int], b5701_standards: Optional[list[HLAStandard]], + alleles_version: str, + alleles_last_updated: datetime, expected_result: HLAResult, ): interp: HLAInterpretation = HLAInterpretation( @@ -435,5 +548,7 @@ def test_hla_result_build_from_interpretation( allele_frequencies=frequencies, b5701_standards=b5701_standards, ) - result: HLAResult = HLAResult.build_from_interpretation(interp) + result: HLAResult = HLAResult.build_from_interpretation( + interp, alleles_version, alleles_last_updated + ) assert result == expected_result diff --git a/tests/models_test.py b/tests/models_test.py index 3187ac5..8edb221 100644 --- a/tests/models_test.py +++ b/tests/models_test.py @@ -148,7 +148,7 @@ def test_get_allele_pair_str( class TestHLAMismatch: @pytest.mark.parametrize( - "index, observed_base, expected_base, expected_str", + "index, sequence_base, standard_base, expected_str", [ (55, "A", "C", "55:A->C"), (199, "C", "R", "199:C->R"), @@ -158,18 +158,44 @@ class TestHLAMismatch: def test_string( self, index: int, - observed_base: str, - expected_base: str, + sequence_base: str, + standard_base: str, expected_str: str, ): mismatch: HLAMismatch = HLAMismatch( index=index, - observed_base=observed_base, - expected_base=expected_base, + sequence_base=sequence_base, + standard_base=standard_base, ) assert str(mismatch) == expected_str +@pytest.mark.parametrize( + "raw_mismatches, expected_result", + [ + pytest.param([], 0, id="no_mismatches"), + pytest.param( + [HLAMismatch(index=100, sequence_base="A", standard_base="T")], + 1, + id="one_mismatches", + ), + pytest.param( + [ + HLAMismatch(index=100, sequence_base="A", standard_base="T"), + HLAMismatch(index=150, sequence_base="R", standard_base="W"), + HLAMismatch(index=150, sequence_base="C", standard_base="Y"), + ], + 3, + id="several_mismatches", + ), + ], +) +def test_hla_match_details_mismatch_count( + raw_mismatches: list[HLAMismatch], expected_result: int +): + assert HLAMatchDetails(mismatches=raw_mismatches).mismatch_count == expected_result + + class TestHLAProteinPair: @pytest.mark.parametrize( "raw_lesser, raw_greater", @@ -1482,7 +1508,19 @@ class TestHLAInterpretation: HLACombinedStandard( standard_bin=(1, 4, 9, 4), possible_allele_pairs=(("A*01:01:01", "A*02:02:02"),), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=15, sequence_base="A", standard_base="R"), + HLAMismatch(index=17, sequence_base="A", standard_base="R"), + HLAMismatch(index=88, sequence_base="G", standard_base="C"), + HLAMismatch( + index=111, sequence_base="G", standard_base="T" + ), + HLAMismatch( + index=205, sequence_base="R", standard_base="Y" + ), + ] + ), }, { HLAProteinPair( @@ -1519,11 +1557,30 @@ class TestHLAInterpretation: HLACombinedStandard( standard_bin=(1, 4, 9, 4), possible_allele_pairs=(("A*01:01:01", "A*02:02:02"),), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=15, sequence_base="A", standard_base="R"), + HLAMismatch(index=17, sequence_base="A", standard_base="R"), + HLAMismatch(index=88, sequence_base="G", standard_base="C"), + HLAMismatch( + index=111, sequence_base="G", standard_base="T" + ), + HLAMismatch( + index=205, sequence_base="R", standard_base="Y" + ), + ], + ), HLACombinedStandard( standard_bin=(1, 4, 9, 2), possible_allele_pairs=(("A*10:01:01", "A*20:02:02"),), - ): HLAMatchDetails(mismatch_count=2, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=22, sequence_base="R", standard_base="C"), + HLAMismatch( + index=222, sequence_base="A", standard_base="R" + ), + ], + ), }, { HLAProteinPair( @@ -1560,19 +1617,39 @@ class TestHLAInterpretation: HLACombinedStandard( standard_bin=(1, 4, 9, 4), possible_allele_pairs=(("A*01:01:01", "A*02:02:02"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=55, sequence_base="A", standard_base="G") + ], + ), HLACombinedStandard( standard_bin=(1, 4, 9, 2), possible_allele_pairs=(("A*10:01:01", "A*20:02:03"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=48, sequence_base="R", standard_base="C") + ], + ), HLACombinedStandard( standard_bin=(2, 4, 9, 2), possible_allele_pairs=(("A*10:01:10", "A*20:22:20"),), - ): HLAMatchDetails(mismatch_count=3, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=45, sequence_base="T", standard_base="C"), + HLAMismatch(index=57, sequence_base="R", standard_base="Y"), + HLAMismatch( + index=122, sequence_base="R", standard_base="G" + ), + ], + ), HLACombinedStandard( standard_bin=(2, 4, 10, 2), possible_allele_pairs=(("A*10:01:10", "A*22:22:22"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=100, sequence_base="A", standard_base="T") + ] + ), }, { HLAProteinPair( @@ -1621,22 +1698,42 @@ class TestHLAInterpretation: HLACombinedStandard( standard_bin=(1, 4, 9, 4), possible_allele_pairs=(("A*01:01:01", "A*02:02:02"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=55, sequence_base="A", standard_base="G") + ] + ), HLACombinedStandard( standard_bin=(1, 4, 9, 2), possible_allele_pairs=(("A*10:01:15", "A*20:02:03"),), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=48, sequence_base="R", standard_base="C") + ] + ), HLACombinedStandard( standard_bin=(2, 4, 9, 2), possible_allele_pairs=(("A*10:01:10", "A*20:22:20"),), - ): HLAMatchDetails(mismatch_count=3, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=45, sequence_base="T", standard_base="C"), + HLAMismatch(index=57, sequence_base="R", standard_base="Y"), + HLAMismatch( + index=122, sequence_base="R", standard_base="G" + ), + ], + ), HLACombinedStandard( standard_bin=(2, 4, 10, 2), possible_allele_pairs=( ("A*10:01:10", "A*20:01"), ("A*10:01:10", "A*22:22:22"), ), - ): HLAMatchDetails(mismatch_count=1, mismatches=[]), + ): HLAMatchDetails( + mismatches=[ + HLAMismatch(index=100, sequence_base="A", standard_base="T") + ] + ), }, { HLAProteinPair( @@ -1819,21 +1916,21 @@ def test_distance_from_b5701( HLACombinedStandard( standard_bin=(1, 4, 9, 4), possible_allele_pairs=(("B*01:01:01", "B*02:02:02"),), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), HLACombinedStandard( standard_bin=(1, 2, 9, 4), possible_allele_pairs=( ("B*01:03:22", "B*02:07:05"), ("B*01:03:25", "B*02:07:05"), ), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), HLACombinedStandard( standard_bin=(1, 2, 9, 4), possible_allele_pairs=( ("B*21:55:07:33N", "B*21:55:07:33N"), ("B*21:55:07:33N", "B*21:55:42"), ), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), }, False, id="typical_case_not_b5701", @@ -1846,21 +1943,21 @@ def test_distance_from_b5701( ("B*22:33:44", "B*56:02:51"), ("B*57:01:04", "B*57:01:03"), ), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), HLACombinedStandard( standard_bin=(1, 2, 9, 4), possible_allele_pairs=( ("B*02:03:25", "B*02:03:27"), ("B*13:31:13", "B*13:31:13"), ), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), HLACombinedStandard( standard_bin=(1, 2, 9, 4), possible_allele_pairs=( ("B*22:55:07:33N", "B*21:55:33"), ("B*22:55:07:33N", "B*21:55:42"), ), - ): HLAMatchDetails(mismatch_count=5, mismatches=[]), + ): HLAMatchDetails(mismatches=[]), }, True, id="typical_case_is_b5701", diff --git a/uv.lock b/uv.lock index 4b36444..e2e9ef1 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version < '3.11'",