Skip to content

Commit

Permalink
✨ scrape descriptions on encode (#35)
Browse files Browse the repository at this point in the history
* 🚧 add description scraper

📝 fix borrows

🐛 fix quote injection

* 🔧 use hdr description when default

* 🔧 user input trumps all;
🐛 public func for default checks
  • Loading branch information
dmiller15 committed Feb 2, 2024
1 parent 692e845 commit 6b659aa
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
24 changes: 23 additions & 1 deletion src/commands/encoder_cmd.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use bincode::Options;
use echtvar_lib::{echtvar::bstrip_chr, fields, kmer16, var32, zigzag};
use rust_htslib::bcf::header::{TagLength, TagType};
use rust_htslib::bcf::header::{TagLength, TagType, HeaderRecord};
use rust_htslib::bcf::record::{Buffer, Record};
use rust_htslib::bcf::{Read as BCFRead, Reader};
use stream_vbyte::{encode::encode, x86::Sse41};
Expand Down Expand Up @@ -160,6 +160,25 @@ fn is_sorted<T: std::cmp::PartialOrd>(data: &Vec<T>) -> bool {
true
}

fn hdr_info_id2description(
mut hrecs: Vec<HeaderRecord>,
id: &String,
default: &std::string::String,
) -> std::string::String {
hrecs.retain(|rec| match rec {
HeaderRecord::Info {key: _, values: v} => &v["ID"] == id,
_ => false}
);
if hrecs.len() != 1 {
panic!("Field {} is either not present in the header or present multiple times!", id);
};
let description = match hrecs.first().unwrap() {
HeaderRecord::Info {key: _, values: v} => if v.contains_key("Description") { &v["Description"] } else { default },
_ => default,
};
return description.trim_matches('"').to_string();
}

pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) {
let zpath = std::path::Path::new(opath);
let jpath = std::path::Path::new(jpath);
Expand Down Expand Up @@ -222,6 +241,9 @@ pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) {
TagLength::Genotypes => f.number = "G".to_string(),
TagLength::Variable => f.number = ".".to_string(),
};
if f.description == fields::default_description_string() {
f.description = hdr_info_id2description(header.header_records(), &f.field, &f.description);
};
}

let zfile = std::fs::File::create(zpath).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion src/lib/echtvar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ impl EchtVars {
} else {
"Float"
},
if &e.description.to_string() == "added by echtvar" {
if e.description.to_string() == fields::default_description_string() {
format!("added by echtvar from {}", path)
} else {
format!("added by echtvar {}", e.description.to_string())
Expand Down
2 changes: 1 addition & 1 deletion src/lib/fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ fn default_missing_value() -> i32 {
fn default_missing_string() -> std::string::String {
"MISSING".to_string()
}
fn default_description_string() -> std::string::String {
pub fn default_description_string() -> std::string::String {
"added by echtvar".to_string()
}
fn default_multiplier() -> u32 {
Expand Down

0 comments on commit 6b659aa

Please sign in to comment.