From e8deb94befc2a67ad8d2e7703233ca1392fb2ec7 Mon Sep 17 00:00:00 2001 From: Richard Liang Date: Thu, 28 Aug 2025 16:07:10 -0700 Subject: [PATCH 1/2] Fixed a mis-identified precondition that doesn't always hold. Fortunately this didn't require much of a code change. Added tests to cover the previously-missed cases. --- src/hla_algorithm/models.py | 6 +----- tests/models_test.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/hla_algorithm/models.py b/src/hla_algorithm/models.py index 10f4035..69b78a7 100644 --- a/src/hla_algorithm/models.py +++ b/src/hla_algorithm/models.py @@ -429,15 +429,11 @@ def _identify_longest_prefix(allele_prefixes: list[GeneCoord]) -> GeneCoord: Precondition: that the input must all share at least the same first coordinate. The algorithm may not return cogent values if not. - - Precondition: the specified allele prefixes do not all perfectly match, - so we lose nothing by trimming one coordinate off the end of all of - them. """ longest_prefix: GeneCoord = () if len(allele_prefixes) > 0: max_length: int = max([len(allele) for allele in allele_prefixes]) - for i in range(max_length - 1, 0, -1): + for i in range(max_length, 0, -1): curr_prefixes: set[GeneCoord] = {allele[0:i] for allele in allele_prefixes} if len(curr_prefixes) == 1: longest_prefix = curr_prefixes.pop() diff --git a/tests/models_test.py b/tests/models_test.py index d3e020e..cb21e59 100644 --- a/tests/models_test.py +++ b/tests/models_test.py @@ -1206,8 +1206,31 @@ def test_identify_clean_prefix_in_pairs( (), id="trivial_case", ), - # Note: we have no single allele tests because that contradicts one - # of our preconditions. + pytest.param( + [("C*01", "02", "03", "04G")], + ("C*01", "02", "03", "04G"), + id="single_input_length_4", + ), + pytest.param( + [("C*01", "02", "03")], + ("C*01", "02", "03"), + id="single_input_length_3", + ), + pytest.param( + [("C*01", "02")], + ("C*01", "02"), + id="single_input_length_2", + ), + pytest.param( + [("C*01",)], + ("C*01",), + id="single_input_length_1", + ), + pytest.param( + [("C*01", "02", "03", "04G"), ("C*01", "02", "03", "04G")], + ("C*01", "02", "03", "04G"), + id="best_match_length_4", + ), pytest.param( [("C*01", "02", "03", "04G"), ("C*01", "02", "03", "110N")], ("C*01", "02", "03"), @@ -1248,6 +1271,11 @@ def test_identify_clean_prefix_in_pairs( ("C*01",), id="best_match_length_1_different_lengths_one_with_no_excess", ), + pytest.param( + [("C*01", "07", "88"), ("C*01", "07", "01"), ("C*01", "07", "01", "110N")], + ("C*01", "07"), + id="typical_case", + ), ], ) def test_identify_longest_prefix( From fa4e4b6ea0f10917fd66e27eecfc420fad966d73 Mon Sep 17 00:00:00 2001 From: Richard Liang Date: Thu, 28 Aug 2025 16:09:20 -0700 Subject: [PATCH 2/2] Fixed up a docstring's formatting. --- src/hla_algorithm/models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/hla_algorithm/models.py b/src/hla_algorithm/models.py index 69b78a7..86ea8b6 100644 --- a/src/hla_algorithm/models.py +++ b/src/hla_algorithm/models.py @@ -427,8 +427,9 @@ def _identify_longest_prefix(allele_prefixes: list[GeneCoord]) -> GeneCoord: """ Identify the longest gene coordinate "prefix" in the given allele prefixes. - Precondition: that the input must all share at least the same first - coordinate. The algorithm may not return cogent values if not. + Precondition: all allele prefixes in the input must all share at least + the same first coordinate. The algorithm may not return cogent values + if not. """ longest_prefix: GeneCoord = () if len(allele_prefixes) > 0: