Skip to content

Commit

Permalink
rectifying vcf filtering by maf
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffersonfparil committed Nov 11, 2023
1 parent 213d4f4 commit 72ac003
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 14 deletions.
3 changes: 2 additions & 1 deletion src/base/vcf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ impl Filter for VcfLine {
while j < m {
q = 0.0;
for i in 0..n {
q += allele_frequencies.matrix[(i, j)] * filter_stats.pool_sizes[i];
q += allele_frequencies.matrix[(i, j)] * filter_stats.pool_sizes[i]
/ filter_stats.pool_sizes.iter().sum::<f64>();
// We've made sure the pool_sizes sum up to one in phen.rs
}
if (q < filter_stats.min_allele_frequency)
Expand Down
2 changes: 1 addition & 1 deletion src/imputation/adaptive_ld_knn_imputation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ impl GenotypesAndPhenotypes {
} // Impute across pools with missing data
} // Impute if we have missing data
} // Iterate across alleles across loci within the window
// println!("window_freqs:\n{:?}", &window_freqs);
// println!("window_freqs:\n{:?}", &window_freqs);
}, // Parallel processing across windows
);
// }
Expand Down
4 changes: 2 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ mod base;
mod gp;
mod gwas;
mod imputation;
mod simulation;
mod popgen;
mod simulation;
mod tables;
use base::{ChunkyReadAnalyseWrite, CrossValidation, LoadAll, Parse, SaveCsv};
use gp::{
Expand All @@ -18,8 +18,8 @@ use gp::{
};
use gwas::*;
use imputation::*;

Check warning on line 20 in src/main.rs

View workflow job for this annotation

GitHub Actions / Compile

unused import: `imputation::*`

Check warning on line 20 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `imputation::*`

Check warning on line 20 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `imputation::*`

Check warning on line 20 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `imputation::*`

Check warning on line 20 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `imputation::*`
use simulation::*;
use popgen::*;
use simulation::*;

Check warning on line 22 in src/main.rs

View workflow job for this annotation

GitHub Actions / Compile

unused import: `simulation::*`

Check warning on line 22 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `simulation::*`

Check warning on line 22 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `simulation::*`

Check warning on line 22 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `simulation::*`

Check warning on line 22 in src/main.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `simulation::*`

#[derive(Parser, Debug)]
#[clap(
Expand Down
28 changes: 18 additions & 10 deletions src/simulation/simulate_genotypes.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
use crate::base::*;

Check warning on line 1 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Compile

unused import: `crate::base`

Check warning on line 1 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `crate::base`

Check warning on line 1 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `crate::base`

Check warning on line 1 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `crate::base`

Check warning on line 1 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `crate::base`
use ndarray::prelude::*;
use std::io;
use statrs::distribution::{Uniform, MultivariateNormal, Continuous};
use rand::distributions::Distribution;
use rand::{seq::IteratorRandom, thread_rng};

Check warning on line 4 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Compile

unused import: `thread_rng`

Check warning on line 4 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `thread_rng`

Check warning on line 4 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `thread_rng`

Check warning on line 4 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `thread_rng`

Check warning on line 4 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `thread_rng`
use statrs::distribution::{Continuous, MultivariateNormal, Uniform};

Check warning on line 5 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Compile

unused import: `Continuous`

Check warning on line 5 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `Continuous`

Check warning on line 5 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused import: `Continuous`

Check warning on line 5 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `Continuous`

Check warning on line 5 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused import: `Continuous`
use std::io;

// Simulate genotypes with some linkage disequilibrium

pub fn simulate_genotypes(n: usize, p: usize, n_chr: usize, max_bp: usize, r2_50_perc_bp: usize) -> io::Result<Array2<f64>> {
pub fn simulate_genotypes(
n: usize,
p: usize,
n_chr: usize,
max_bp: usize,
r2_50_perc_bp: usize,
) -> io::Result<Array2<f64>> {
// Define approximately equally sized chromosomes and coordinates of the characterised loci or markers or SNPs
let mut chromosome_sizes = vec![p/n_chr; n_chr];
let mut chromosome_sizes = vec![p / n_chr; n_chr];
let p_ = chromosome_sizes.iter().fold(0, |sum, &x| sum + x);
// If the we have less or more than the required number of loci we add or subtract loci from the last chromosome
if p_ < p {
chromosome_sizes[n_chr-1] += p - p_;
chromosome_sizes[n_chr - 1] += p - p_;
} else if p_ > p {
chromosome_sizes[n_chr-1] -= p_ - p;
chromosome_sizes[n_chr - 1] -= p_ - p;
}
let max_bp_per_chr = max_bp/n_chr;
let max_bp_per_chr = max_bp / n_chr;
println!("Chromosome sizes: {:?}", chromosome_sizes);
// Sample uniformly distributed loci per chromosome
let dist_unif = Uniform::new(0.0, max_bp_per_chr as f64).unwrap();
Expand All @@ -27,7 +33,10 @@ pub fn simulate_genotypes(n: usize, p: usize, n_chr: usize, max_bp: usize, r2_50
let mut chromosomes: Array1<String> = Array1::from_elem(p, "".to_owned());
let mut positions: Array1<usize> = Array1::from_elem(p, 0);
let mut alleles: Array1<String> = Array1::from_elem(p, "".to_owned());
let atcgd = vec!["A", "T", "C", "G", "D"].iter().map(|&x| x.to_owned()).collect::<Vec<String>>();
let atcgd = vec!["A", "T", "C", "G", "D"]
.iter()
.map(|&x| x.to_owned())
.collect::<Vec<String>>();
for i in 0..n_chr {
// Total number of sites we want to sample in the chromosome
let s = chromosome_sizes[i];
Expand All @@ -50,8 +59,7 @@ pub fn simulate_genotypes(n: usize, p: usize, n_chr: usize, max_bp: usize, r2_50

let mvn = MultivariateNormal::new(vec![0., 0.], vec![1., 0., 0., 1.]).unwrap();

Check warning on line 60 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Compile

unused variable: `mvn`

Check warning on line 60 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused variable: `mvn`

Check warning on line 60 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

unused variable: `mvn`

Check warning on line 60 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused variable: `mvn`

Check warning on line 60 in src/simulation/simulate_genotypes.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

unused variable: `mvn`


return Ok(Array2::from_elem((1, 1), f64::NAN))
return Ok(Array2::from_elem((1, 1), f64::NAN));
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit 72ac003

Please sign in to comment.