Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions .github/workflows/build_and_publish_gem.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Build and publish the Ruby package

on:
release:
types: [published]

jobs:
build_gem:
runs-on: ubuntu-latest

env:
HLA_ALGORITHM_VERSION: ${{ github.ref_name }}
BUILD_PATH: ${{ github.workspace }}/ruby
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

steps:
- name: Install Ruby
run: |
sudo apt update
sudo apt install -y ruby

- name: Checkout code from repo
uses: actions/checkout@v4

- name: Build the Ruby package
run: |
cd $BUILD_PATH
gem build ${BUILD_PATH}/hla_algorithm.gemspec

- name: Publish gem to GitHub Packages
run: |
mkdir -p $HOME/.gem
touch $HOME/.gem/credentials
chmod 0600 $HOME/.gem/credentials
printf -- "---\n:github: Bearer ${GH_TOKEN}\n" > $HOME/.gem/credentials
gem push --KEY github --host https://rubygems.pkg.github.com/${OWNER} ${BUILD_PATH}/*.gem
env:
OWNER: ${{ github.repository_owner }}

- name: Add gem as a release asset
run: gh release upload $HLA_ALGORITHM_VERSION ${BUILD_PATH}/*.gem
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ package = true

[tool.uv-dynamic-versioning]
vcs = "git"
style = "semver"
style = "pep440"
fallback-version = "0.0.0"

[tool.pytest.ini_options]
Expand Down
48 changes: 26 additions & 22 deletions ruby/lib/hla_algorithm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

HLA_INTERPRET_FROM_JSON = ENV['HLA_INTERPRET_FROM_JSON']
if HLA_INTERPRET_FROM_JSON.nil?
raise "HLA_INTERPRET_FROM_JSON must be set"
raise 'HLA_INTERPRET_FROM_JSON must be set'
end


Expand All @@ -26,18 +26,21 @@ class HLAResult
)

def initialize(raw_result)
@seqs = raw_result["seqs"]
@alleles_all = raw_result["alleles_all"]
@alleles_clean = raw_result["alleles_clean"]
@alleles_for_mismatches = raw_result["alleles_for_mismatches"]
@mismatches = raw_result["mismatches"]
@ambiguous = raw_result["ambiguous"]
@homozygous = raw_result["homozygous"]
@locus = raw_result["locus"]
@alg_version = raw_result["alg_version"]
@b5701 = raw_result["b5701"]
@dist_b5701 = raw_result["dist_b5701"]
@errors = raw_result["errors"]
@seqs = raw_result['seqs']
@alleles_all = raw_result['alleles_all']
@alleles_clean = raw_result['alleles_clean']
@alleles_for_mismatches = raw_result['alleles_for_mismatches']
@mismatches = raw_result['mismatches']
@ambiguous = raw_result['ambiguous']
@homozygous = raw_result['homozygous']
@locus = raw_result['locus']
@alg_version = raw_result['alg_version']
@alleles_version = raw_result['alleles_version']
@alleles_last_updated = raw_result['alleles_last_updated']
@b5701 = raw_result['b5701']
@dist_b5701 = raw_result['dist_b5701']
@errors = raw_result['errors']
@all_mismatches = raw_result['all_mismatches']
end
end

Expand All @@ -51,20 +54,21 @@ def initialize(
@hla_freq_path = hla_freq_path
end

def analyze(seqs, locus='B')
def analyze(seqs, locus='B', threshold=nil)
hla_input = {
"seq1" => seqs[0],
"seq2" => seqs[1],
"locus" => locus,
"hla_std_path" => nil,
"hla_freq_path" => nil
'seq1' => seqs[0],
'seq2' => seqs[1],
'locus' => locus,
'threshold' => threshold,
'hla_std_path' => nil,
'hla_freq_path' => nil
}

if (!@hla_std_path.nil?)
hla_input["hla_std_path"] = File.expand_path(@hla_std_path)
hla_input['hla_std_path'] = File.expand_path(@hla_std_path)
end
if (!@hla_freq_path.nil?)
hla_input["hla_freq_path"] = File.expand_path(@hla_freq_path)
hla_input['hla_freq_path'] = File.expand_path(@hla_freq_path)
end

python_stdout, python_stderr, wait_thread = Open3.capture3(
Expand All @@ -73,7 +77,7 @@ def analyze(seqs, locus='B')
)

if !wait_thread.success?
error_msg = "HLA algorithm failed with exit code "\
error_msg = 'HLA algorithm failed with exit code '\
"#{wait_thread.value}. Error output:\n"\
"#{python_stderr}"
raise error_msg
Expand Down
25 changes: 16 additions & 9 deletions src/hla_algorithm/hla_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def load_default_hla_standards() -> LoadedStandards:
:return: List of known HLA standards
:rtype: list[HLAStandard]
"""
standards_filename: str = os.path.join(
standards_filename: str = HLAAlgorithm._path_join_shim(
os.path.dirname(__file__),
"default_data",
"hla_standards.yaml",
Expand Down Expand Up @@ -192,6 +192,13 @@ def read_hla_frequencies(
hla_freqs[locus][protein_pair] += 1
return hla_freqs

@staticmethod
def _path_join_shim(*args) -> str:
"""
A shim for os.path.join which allows us to mock out the method easily in testing.
"""
return os.path.join(*args)

@staticmethod
def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]:
"""
Expand All @@ -201,7 +208,7 @@ def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]
:rtype: dict[HLA_LOCUS, dict[HLAProteinPair, int]]
"""
hla_freqs: dict[HLA_LOCUS, dict[HLAProteinPair, int]]
default_frequencies_filename: str = os.path.join(
default_frequencies_filename: str = HLAAlgorithm._path_join_shim(
os.path.dirname(__file__),
"default_data",
"hla_frequencies.csv",
Expand Down Expand Up @@ -282,9 +289,8 @@ def combine_standards_stepper(
mismatches = combos[combined_std_bin]

else:
seq_mask = np.full_like(std_bin, fill_value=15)
# Note that seq is implicitly cast to a NumPy array:
mismatches = np.count_nonzero((std_bin ^ seq) & seq_mask != 0)
mismatches = np.count_nonzero(std_bin ^ seq != 0)
combos[combined_std_bin] = mismatches # cache this value

if mismatches > current_rejection_threshold:
Expand Down Expand Up @@ -335,7 +341,9 @@ def combine_standards(
combined_std_bin,
mismatches,
allele_pair,
) in HLAAlgorithm.combine_standards_stepper(matching_stds, seq, mismatch_threshold):
) in HLAAlgorithm.combine_standards_stepper(
matching_stds, seq, mismatch_threshold
):
if combined_std_bin not in combos:
combos[combined_std_bin] = (mismatches, [])
combos[combined_std_bin][1].append(allele_pair)
Expand Down Expand Up @@ -404,8 +412,8 @@ def get_mismatches(
mislist.append(
HLAMismatch(
index=dex,
observed_base=BIN2NUC[sequence_bin[index]],
expected_base=BIN2NUC[correct_base_bin],
sequence_base=BIN2NUC[sequence_bin[index]],
standard_base=BIN2NUC[correct_base_bin],
)
)

Expand Down Expand Up @@ -459,14 +467,13 @@ def interpret(
hla_sequence=hla_sequence,
matches={
combined_std: HLAMatchDetails(
mismatch_count=mismatch_count,
mismatches=self.get_mismatches(
combined_std.standard_bin,
seq,
locus,
),
)
for combined_std, mismatch_count in all_combos.items()
for combined_std in all_combos
},
allele_frequencies=self.hla_frequencies[locus],
b5701_standards=b5701_standards,
Expand Down
10 changes: 8 additions & 2 deletions src/hla_algorithm/interpret_from_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ def main():
hla_input.hla_std_path,
hla_input.hla_freq_path,
)
interp: HLAInterpretation = hla_alg.interpret(hla_input.hla_sequence())
print(HLAResult.build_from_interpretation(interp).model_dump_json())
interp: HLAInterpretation = hla_alg.interpret(
hla_input.hla_sequence(), hla_input.threshold
)
print(
HLAResult.build_from_interpretation(
interp, hla_alg.tag, hla_alg.last_updated
).model_dump_json()
)


if __name__ == "__main__":
Expand Down
34 changes: 33 additions & 1 deletion src/hla_algorithm/interpret_from_json_lib.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
from typing import Optional

from pydantic import BaseModel, Field
Expand All @@ -24,6 +25,7 @@ class HLAInput(BaseModel):
seq1: str
seq2: Optional[str]
locus: HLA_LOCUS
threshold: Optional[int] = None
hla_std_path: Optional[str] = None
hla_freq_path: Optional[str] = None

Expand Down Expand Up @@ -81,6 +83,22 @@ def hla_sequence(self) -> HLASequence:
)


class HLAMatchAdaptor(BaseModel):
"""
An "adaptor" for HLAMatchDetails for inclusion in an HLAResult.
"""

mismatch_count: int
mismatches: list[str]

@classmethod
def from_match_details(cls, match: HLAMatchDetails) -> "HLAMatchAdaptor":
return cls(
mismatch_count=match.mismatch_count,
mismatches=[str(x) for x in match.mismatches],
)


class HLAResult(BaseModel):
seqs: list[str] = Field(default_factory=list)
alleles_all: list[str] = Field(default_factory=list)
Expand All @@ -91,12 +109,20 @@ class HLAResult(BaseModel):
homozygous: bool = False
locus: HLA_LOCUS = "B"
alg_version: str = __version__
alleles_version: str = ""
alleles_last_updated: datetime = Field(default_factory=datetime.now)
b5701: bool = False
dist_b5701: Optional[int] = None
errors: list[str] = Field(default_factory=list)
all_mismatches: dict[str, HLAMatchAdaptor] = Field(default_factory=dict)

@classmethod
def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult":
def build_from_interpretation(
cls,
interp: HLAInterpretation,
alleles_version: str,
alleles_last_updated: datetime,
) -> "HLAResult":
aps: AllelePairs = interp.best_matching_allele_pairs()

# Pick one of the combined standards represented by what goes into
Expand Down Expand Up @@ -124,6 +150,12 @@ def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult":
ambiguous=aps.is_ambiguous(),
homozygous=aps.is_homozygous(),
locus=interp.locus,
alleles_version=alleles_version,
alleles_last_updated=alleles_last_updated,
b5701=interp.is_b5701(),
dist_b5701=interp.distance_from_b7501(),
all_mismatches={
cs.get_allele_pair_str(): HLAMatchAdaptor.from_match_details(match)
for cs, match in interp.matches.items()
},
)
11 changes: 7 additions & 4 deletions src/hla_algorithm/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,20 @@ def get_allele_pair_str(self):

class HLAMismatch(BaseModel):
index: int
observed_base: str
expected_base: str
sequence_base: str
standard_base: str

def __str__(self):
return f"{self.index}:{self.observed_base}->{self.expected_base}"
return f"{self.index}:{self.sequence_base}->{self.standard_base}"


class HLAMatchDetails(BaseModel):
mismatch_count: int
mismatches: list[HLAMismatch]

@property
def mismatch_count(self) -> int:
return len(self.mismatches)


class HLAProteinPair(BaseModel):
# Allows this to be hashable:
Expand Down
Loading
Loading