Skip to content

Commit

Permalink
feat: move clinvar database code (import etc.) to annonars (#86)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Jun 8, 2023
1 parent bb27686 commit 457be46
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 535 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ noodles-csi = "0.19"
noodles-fasta = "0.23"
noodles-bgzf = "0.22"
noodles-tabix = "0.22"
annonars = "0.7"
annonars = "0.8"

[build-dependencies]
prost-build = "0.11.9"
Expand Down
21 changes: 13 additions & 8 deletions src/annotate/seqvars/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod binning;
pub mod csq;
pub mod provider;

use annonars::clinvar_minimal;
use annonars::common::cli::is_canonical;
use annonars::common::keys;
use annonars::freqs::cli::import::reading::guess_assembly;
Expand Down Expand Up @@ -48,8 +49,6 @@ use thousands::Separable;
use crate::annotate::seqvars::csq::{ConsequencePredictor, VcfVariant};
use crate::annotate::seqvars::provider::MehariProvider;
use crate::common::GenomeRelease;
use crate::db::create::seqvar_clinvar::serialize::Record as ClinvarRecord;
use crate::db::create::seqvar_clinvar::Pathogenicity;

use crate::db::create::txs::data::TxSeqDatabase;
use crate::ped::{PedigreeByName, Sex};
Expand Down Expand Up @@ -389,14 +388,19 @@ fn annotate_record_clinvar<T>(
where
T: ThreadMode,
{
if let Some(clinvar_anno) = db.get_cf(cf, key)? {
let clinvar_record: ClinvarRecord = bincode::deserialize(&clinvar_anno)?;
if let Some(raw_value) = db.get_cf(cf, key)? {
let clinvar_record =
clinvar_minimal::pbs::Record::decode(&mut std::io::Cursor::new(&raw_value))?;

let ClinvarRecord {
let clinvar_minimal::pbs::Record {
summary_clinvar_pathogenicity,
vcv,
..
} = clinvar_record;
let summary_clinvar_pathogenicity: Vec<_> = summary_clinvar_pathogenicity
.into_iter()
.map(|i: i32| -> clinvar_minimal::cli::reading::Pathogenicity { i.into() })
.collect();

vcf_record.info_mut().insert(
field::Key::from_str("clinvar_patho").unwrap(),
Expand Down Expand Up @@ -1079,8 +1083,9 @@ impl VarFishSeqvarTsvWriter {
.unwrap_or_default()
.map(|v| match v {
field::Value::String(value) => {
Pathogenicity::from_str(value).unwrap_or(Pathogenicity::UncertainSignificance)
>= Pathogenicity::LikelyPathogenic
clinvar_minimal::cli::reading::Pathogenicity::from_str(value).unwrap_or(
clinvar_minimal::cli::reading::Pathogenicity::UncertainSignificance,
) >= clinvar_minimal::cli::reading::Pathogenicity::LikelyPathogenic
}
_ => panic!("Unexpected value type for INFO/clinvar_patho"),
})
Expand Down Expand Up @@ -1485,7 +1490,7 @@ fn run_with_writer(writer: &mut dyn AnnotatedVcfWriter, args: &Args) -> Result<(
// Annotate with variant effect.
if let Some(ann_fields) = predictor.predict(&VcfVariant {
chromosome: chrom,
position: pos as i32,
position: pos,
reference,
alternative,
})? {
Expand Down
25 changes: 9 additions & 16 deletions src/annotate/strucvars/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ pub mod vcf_header {
};

for sequence in &assembly_info.sequences {
if is_canonical(&sequence.name.as_ref()) {
if is_canonical(sequence.name.as_ref()) {
let mut contig = Map::<Contig>::try_from(vec![
(String::from("length"), format!("{}", sequence.length)),
(String::from("assembly"), assembly_name.clone()),
Expand Down Expand Up @@ -2402,7 +2402,7 @@ mod conv {

/// Helper function to handle processing of "GT" field.
fn process_gt(
entries: &mut Vec<GenotypeInfo>,
entries: &mut [GenotypeInfo],
sample_no: usize,
gt: &String,
pedigree: &PedigreeByName,
Expand All @@ -2413,23 +2413,20 @@ mod conv {
let sex = pedigree
.individuals
.get(&entries[sample_no].name)
.expect(&format!(
"sample must be in pedigree: {:?}",
&entries[sample_no].name
))
.unwrap_or_else(|| panic!("sample must be in pedigree: {:?}", &entries[sample_no].name))
.sex;
let is_chr_x = tsv_record.chromosome.contains('X');
let is_chr_y = tsv_record.chromosome.contains('Y');
let has_ref = entries[sample_no]
.gt
.as_ref()
.expect("just set")
.contains("0");
.contains('0');
let has_alt = entries[sample_no]
.gt
.as_ref()
.expect("just set")
.contains("1");
.contains('1');

match (is_chr_x, is_chr_y, sex, has_ref, has_alt) {
// autosomal chromosomes; treat "./." as missing, "./1" (and similar) as hom,
Expand Down Expand Up @@ -2458,9 +2455,7 @@ mod conv {
// count as hemi alt. on male chrY
tsv_record.num_hemi_alt += 1;
}
(false, true, Sex::Female, _, _) => {
(/* do not count; variant in female */)
}
(false, true, Sex::Female, _, _) => {}
(false, true, _, _, _) => (/* do not count; sex missing */),
// conflicting chromosome
(true, true, _, _, _) => panic!("cannot be both chrX and chrY"),
Expand All @@ -2475,14 +2470,14 @@ mod conv {
let is_chr_x = tsv_record.chromosome.contains('X');
let is_chr_y = tsv_record.chromosome.contains('Y');
for entry in &tsv_record.genotype.entries {
let has_ref = entry.gt.as_ref().map(|s| s.contains("0")).unwrap_or(false);
let has_alt = entry.gt.as_ref().map(|s| s.contains("1")).unwrap_or(false);
let has_ref = entry.gt.as_ref().map(|s| s.contains('0')).unwrap_or(false);
let has_alt = entry.gt.as_ref().map(|s| s.contains('1')).unwrap_or(false);
if !has_ref && !has_alt {
// This entry was previously removed, we can correct it now (if FORMAT/CN was set).
let sex = pedigree
.individuals
.get(&entry.name)
.expect(&format!("sample must be in pedigree: {:?}", &entry.name))
.unwrap_or_else(|| panic!("sample must be in pedigree: {:?}", &entry.name))
.sex;
let expected_cn = match (sex, is_chr_x, is_chr_y) {
(_, false, false) => Some(2),
Expand Down Expand Up @@ -3136,8 +3131,6 @@ mod test {
},
sex: if sample == "mother" {
Sex::Female
} else if sample == "father" {
Sex::Male
} else {
Sex::Male
},
Expand Down
1 change: 0 additions & 1 deletion src/db/create/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
//! Creation of mehari internal databases.

pub mod seqvar_clinvar;
pub mod txs;
Loading

0 comments on commit 457be46

Please sign in to comment.