# Variant calling & filtering with AvePla samples

In [None]:
module load bcftools

## Calling all invariant+variant sites 
*bcftools* mpileup ... | *bcftools* call ...

In [None]:
baseDIR=~/snap_hap_repHZ/SnpCalls
bamlist=$baseDIR/AvePla.bams.list
outVCF=$baseDIR/AvePla.vcf.gz
sbatch -J SnpCallingJob ./SnpCalling.sbatch.sh $baseDIR $bamlist $outVCF
#11:16:30

In [None]:
bcftools query -f '%CHROM\t%POS\t%QUAL\t%INFO/DP\t%INFO/MQ\t%INFO/AN\t%INFO/AC\n' AvePla.vcf.gz > AvePla.vcf.info

## Filtering variant

In [None]:
baseDIR=~/snap_hap_repHZ/SnpCalls

# Step 1: Remove SNPs within 5bp of INDELs and keep only bi-alleleic variant sites
time bcftools filter --threads 4 $inVCF --SnpGap 5 | \
     bcftools view --threads 4 - -Oz -o $outVCF -m2 -M2 -v snps -e "AC==0 || AC==AN" --write-index
inVCF=$baseDIR/AvePla.vcf.gz
outVCF=$baseDIR/AvePla.biSNPs.vcf.gz
sbatch -J SnpFilteringJob1 ./SnpFiltering.sbatch.sh $inVCF $outVCF
#263m40.339s

# Step 2: Remove sites based on depth, mapping quality, and QUAL
inVCF=$baseDIR/AvePla.biSNPs.vcf.gz
outVCF=$baseDIR/AvePla.biSNPs.filtered.vcf.gz
sbatch -J Filter ./SnpFiltering.sbatch.sh $inVCF $outVCF
# 12 mins

# Step 3: Remove sites that have a missing fraction > 0.8
inVCF=$baseDIR/AvePla.biSNPs.filtered.vcf.gz
outVCF=$baseDIR/AvePla.biSNPs.filtered.missLT80.vcf.gz
sbatch -J Filter3 ./SnpFiltering.sbatch.sh $inVCF $outVCF
# 20 mins

In [None]:
bcftools query -f '%CHROM\t%POS\t%QUAL\t%INFO/DP\t%INFO/MQ\t%INFO/AN\t%INFO/AC\n' ./AvePla.biSNPs.filtered.bcf > AvePla.biSNPs.filtered.vcf.info