cfe-lab · rhliang · Aug 13, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/.github/workflows/build_and_publish_gem.yml b/.github/workflows/build_and_publish_gem.yml
@@ -0,0 +1,41 @@
+name: Build and publish the Ruby package
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build_gem:
+    runs-on: ubuntu-latest
+
+    env:
+      HLA_ALGORITHM_VERSION: ${{ github.ref_name }}
+      BUILD_PATH: ${{ github.workspace }}/ruby
+      GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+    - name: Install Ruby
+      run: |
+        sudo apt update
+        sudo apt install -y ruby
+
+    - name: Checkout code from repo
+      uses: actions/checkout@v4
+
+    - name: Build the Ruby package
+      run: |
+        cd $BUILD_PATH
+        gem build ${BUILD_PATH}/hla_algorithm.gemspec
+
+    - name: Publish gem to GitHub Packages
+      run: |
+        mkdir -p $HOME/.gem
+        touch $HOME/.gem/credentials
+        chmod 0600 $HOME/.gem/credentials
+        printf -- "---\n:github: Bearer ${GH_TOKEN}\n" > $HOME/.gem/credentials
+        gem push --KEY github --host https://rubygems.pkg.github.com/${OWNER} ${BUILD_PATH}/*.gem
+      env:
+        OWNER: ${{ github.repository_owner }}
+
+    - name: Add gem as a release asset
+      run: gh release upload $HLA_ALGORITHM_VERSION ${BUILD_PATH}/*.gem
diff --git a/pyproject.toml b/pyproject.toml
@@ -102,7 +102,7 @@ package = true
 
 [tool.uv-dynamic-versioning]
 vcs = "git"
-style = "semver"
+style = "pep440"
 fallback-version = "0.0.0"
 
 [tool.pytest.ini_options]

diff --git a/ruby/lib/hla_algorithm.rb b/ruby/lib/hla_algorithm.rb
@@ -5,7 +5,7 @@
 
 HLA_INTERPRET_FROM_JSON = ENV['HLA_INTERPRET_FROM_JSON']
 if HLA_INTERPRET_FROM_JSON.nil?
-  raise "HLA_INTERPRET_FROM_JSON must be set"
+  raise 'HLA_INTERPRET_FROM_JSON must be set'
 end
 
 
@@ -26,18 +26,21 @@ class HLAResult
   )
 
   def initialize(raw_result)
-    @seqs = raw_result["seqs"]
-    @alleles_all = raw_result["alleles_all"]
-    @alleles_clean = raw_result["alleles_clean"]
-    @alleles_for_mismatches = raw_result["alleles_for_mismatches"]
-    @mismatches = raw_result["mismatches"]
-    @ambiguous = raw_result["ambiguous"]
-    @homozygous = raw_result["homozygous"]
-    @locus = raw_result["locus"]
-    @alg_version = raw_result["alg_version"]
-    @b5701 = raw_result["b5701"]
-    @dist_b5701 = raw_result["dist_b5701"]
-    @errors = raw_result["errors"]
+    @seqs = raw_result['seqs']
+    @alleles_all = raw_result['alleles_all']
+    @alleles_clean = raw_result['alleles_clean']
+    @alleles_for_mismatches = raw_result['alleles_for_mismatches']
+    @mismatches = raw_result['mismatches']
+    @ambiguous = raw_result['ambiguous']
+    @homozygous = raw_result['homozygous']
+    @locus = raw_result['locus']
+    @alg_version = raw_result['alg_version']
+    @alleles_version = raw_result['alleles_version']
+    @alleles_last_updated = raw_result['alleles_last_updated']
+    @b5701 = raw_result['b5701']
+    @dist_b5701 = raw_result['dist_b5701']
+    @errors = raw_result['errors']
+    @all_mismatches = raw_result['all_mismatches']
   end
 end
 
@@ -51,20 +54,21 @@ def initialize(
     @hla_freq_path = hla_freq_path
   end
 
-  def analyze(seqs, locus='B')
+  def analyze(seqs, locus='B', threshold=nil)
     hla_input = {
-      "seq1" => seqs[0],
-      "seq2" => seqs[1],
-      "locus" => locus,
-      "hla_std_path" => nil,
-      "hla_freq_path" => nil
+      'seq1' => seqs[0],
+      'seq2' => seqs[1],
+      'locus' => locus,
+      'threshold' => threshold,
+      'hla_std_path' => nil,
+      'hla_freq_path' => nil
     }
 
     if (!@hla_std_path.nil?)
-      hla_input["hla_std_path"] = File.expand_path(@hla_std_path)
+      hla_input['hla_std_path'] = File.expand_path(@hla_std_path)
     end
     if (!@hla_freq_path.nil?)
-      hla_input["hla_freq_path"] = File.expand_path(@hla_freq_path)
+      hla_input['hla_freq_path'] = File.expand_path(@hla_freq_path)
     end
 
     python_stdout, python_stderr, wait_thread = Open3.capture3(
@@ -73,7 +77,7 @@ def analyze(seqs, locus='B')
     )
 
     if !wait_thread.success?
-      error_msg = "HLA algorithm failed with exit code "\
+      error_msg = 'HLA algorithm failed with exit code '\
         "#{wait_thread.value}.  Error output:\n"\
         "#{python_stderr}"
       raise error_msg

diff --git a/src/hla_algorithm/hla_algorithm.py b/src/hla_algorithm/hla_algorithm.py
@@ -134,7 +134,7 @@ def load_default_hla_standards() -> LoadedStandards:
         :return: List of known HLA standards
         :rtype: list[HLAStandard]
         """
-        standards_filename: str = os.path.join(
+        standards_filename: str = HLAAlgorithm._path_join_shim(
             os.path.dirname(__file__),
             "default_data",
             "hla_standards.yaml",
@@ -192,6 +192,13 @@ def read_hla_frequencies(
                     hla_freqs[locus][protein_pair] += 1
         return hla_freqs
 
+    @staticmethod
+    def _path_join_shim(*args) -> str:
+        """
+        A shim for os.path.join which allows us to mock out the method easily in testing.
+        """
+        return os.path.join(*args)
+
     @staticmethod
     def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]:
         """
@@ -201,7 +208,7 @@ def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]
         :rtype: dict[HLA_LOCUS, dict[HLAProteinPair, int]]
         """
         hla_freqs: dict[HLA_LOCUS, dict[HLAProteinPair, int]]
-        default_frequencies_filename: str = os.path.join(
+        default_frequencies_filename: str = HLAAlgorithm._path_join_shim(
             os.path.dirname(__file__),
             "default_data",
             "hla_frequencies.csv",
@@ -282,9 +289,8 @@ def combine_standards_stepper(
                     mismatches = combos[combined_std_bin]
 
                 else:
-                    seq_mask = np.full_like(std_bin, fill_value=15)
                     # Note that seq is implicitly cast to a NumPy array:
-                    mismatches = np.count_nonzero((std_bin ^ seq) & seq_mask != 0)
+                    mismatches = np.count_nonzero(std_bin ^ seq != 0)
                     combos[combined_std_bin] = mismatches  # cache this value
 
                 if mismatches > current_rejection_threshold:
@@ -335,7 +341,9 @@ def combine_standards(
             combined_std_bin,
             mismatches,
             allele_pair,
-        ) in HLAAlgorithm.combine_standards_stepper(matching_stds, seq, mismatch_threshold):
+        ) in HLAAlgorithm.combine_standards_stepper(
+            matching_stds, seq, mismatch_threshold
+        ):
             if combined_std_bin not in combos:
                 combos[combined_std_bin] = (mismatches, [])
             combos[combined_std_bin][1].append(allele_pair)
@@ -404,8 +412,8 @@ def get_mismatches(
             mislist.append(
                 HLAMismatch(
                     index=dex,
-                    observed_base=BIN2NUC[sequence_bin[index]],
-                    expected_base=BIN2NUC[correct_base_bin],
+                    sequence_base=BIN2NUC[sequence_bin[index]],
+                    standard_base=BIN2NUC[correct_base_bin],
                 )
             )
 
@@ -459,14 +467,13 @@ def interpret(
             hla_sequence=hla_sequence,
             matches={
                 combined_std: HLAMatchDetails(
-                    mismatch_count=mismatch_count,
                     mismatches=self.get_mismatches(
                         combined_std.standard_bin,
                         seq,
                         locus,
                     ),
                 )
-                for combined_std, mismatch_count in all_combos.items()
+                for combined_std in all_combos
             },
             allele_frequencies=self.hla_frequencies[locus],
             b5701_standards=b5701_standards,

diff --git a/src/hla_algorithm/interpret_from_json.py b/src/hla_algorithm/interpret_from_json.py
@@ -38,8 +38,14 @@ def main():
             hla_input.hla_std_path,
             hla_input.hla_freq_path,
         )
-        interp: HLAInterpretation = hla_alg.interpret(hla_input.hla_sequence())
-        print(HLAResult.build_from_interpretation(interp).model_dump_json())
+        interp: HLAInterpretation = hla_alg.interpret(
+            hla_input.hla_sequence(), hla_input.threshold
+        )
+        print(
+            HLAResult.build_from_interpretation(
+                interp, hla_alg.tag, hla_alg.last_updated
+            ).model_dump_json()
+        )
 
 
 if __name__ == "__main__":

diff --git a/src/hla_algorithm/interpret_from_json_lib.py b/src/hla_algorithm/interpret_from_json_lib.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from typing import Optional
 
 from pydantic import BaseModel, Field
@@ -24,6 +25,7 @@ class HLAInput(BaseModel):
     seq1: str
     seq2: Optional[str]
     locus: HLA_LOCUS
+    threshold: Optional[int] = None
     hla_std_path: Optional[str] = None
     hla_freq_path: Optional[str] = None
 
@@ -81,6 +83,22 @@ def hla_sequence(self) -> HLASequence:
         )
 
 
+class HLAMatchAdaptor(BaseModel):
+    """
+    An "adaptor" for HLAMatchDetails for inclusion in an HLAResult.
+    """
+
+    mismatch_count: int
+    mismatches: list[str]
+
+    @classmethod
+    def from_match_details(cls, match: HLAMatchDetails) -> "HLAMatchAdaptor":
+        return cls(
+            mismatch_count=match.mismatch_count,
+            mismatches=[str(x) for x in match.mismatches],
+        )
+
+
 class HLAResult(BaseModel):
     seqs: list[str] = Field(default_factory=list)
     alleles_all: list[str] = Field(default_factory=list)
@@ -91,12 +109,20 @@ class HLAResult(BaseModel):
     homozygous: bool = False
     locus: HLA_LOCUS = "B"
     alg_version: str = __version__
+    alleles_version: str = ""
+    alleles_last_updated: datetime = Field(default_factory=datetime.now)
     b5701: bool = False
     dist_b5701: Optional[int] = None
     errors: list[str] = Field(default_factory=list)
+    all_mismatches: dict[str, HLAMatchAdaptor] = Field(default_factory=dict)
 
     @classmethod
-    def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult":
+    def build_from_interpretation(
+        cls,
+        interp: HLAInterpretation,
+        alleles_version: str,
+        alleles_last_updated: datetime,
+    ) -> "HLAResult":
         aps: AllelePairs = interp.best_matching_allele_pairs()
 
         # Pick one of the combined standards represented by what goes into
@@ -124,6 +150,12 @@ def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult":
             ambiguous=aps.is_ambiguous(),
             homozygous=aps.is_homozygous(),
             locus=interp.locus,
+            alleles_version=alleles_version,
+            alleles_last_updated=alleles_last_updated,
             b5701=interp.is_b5701(),
             dist_b5701=interp.distance_from_b7501(),
+            all_mismatches={
+                cs.get_allele_pair_str(): HLAMatchAdaptor.from_match_details(match)
+                for cs, match in interp.matches.items()
+            },
         )
diff --git a/src/hla_algorithm/models.py b/src/hla_algorithm/models.py
@@ -98,17 +98,20 @@ def get_allele_pair_str(self):
 
 class HLAMismatch(BaseModel):
     index: int
-    observed_base: str
-    expected_base: str
+    sequence_base: str
+    standard_base: str
 
     def __str__(self):
-        return f"{self.index}:{self.observed_base}->{self.expected_base}"
+        return f"{self.index}:{self.sequence_base}->{self.standard_base}"
 
 
 class HLAMatchDetails(BaseModel):
-    mismatch_count: int
     mismatches: list[HLAMismatch]
 
+    @property
+    def mismatch_count(self) -> int:
+        return len(self.mismatches)
+
 
 class HLAProteinPair(BaseModel):
     # Allows this to be hashable: