In [None]:
#!/bin/bash 
#SBATCH -N 1 # number of nodes 
#SBATCH -n 16 # number of cores 
#SBATCH --job-name="intersectRohVar" 
#SBATCH --mail-type=END 
#SBATCH --partition=computeq 
#SBATCH --mail-user=hrclndnn@memphis.edu 
#SBATCH -a 0-30

module load bcftools 
module load bedtools
module load htslib 

#execute from within the following directory: 
#maf 0.05: /home/hrclndnn/snpEff/Uarct/ 
#          final version: /home/hrclndnn/snpEff/Uarct/maf05
#maf 0.06: original /home/hrclndnn/snpEff/Uarct/het_hom 
#          final version: /home/hrclndnn/snpEff/Uarct/maf06

#location of ROH bed files: /home/hrclndnn/snpEff/Uarct/Uarct_ROHs/${file}.bed
#location of SnpEff annotated VCF: /home/hrclndnn/snpEff/Uarct/Uarctos.ann.vcf.gz

files=(AK17500
AK17512
AK17578
AKAdmiralty1
AKAdmiralty2
AKAdmiralty3
AKBaranof1
AKBaranof2
AKChichagof1
AKChichagof2
AKChichagof3
AKChichagof4
AKChichagof5
AKDenali1
AKKenai	
APN2
GRE2
GYE906
GYE922
GYE953
JPHc1
JPHc2
JPHe1
JPHe2
JPHs1
JPHs2
MTgnp
OFS01   
RF01    
SJS01
SWEDalarna)


file=${files[$SLURM_ARRAY_TASK_ID]}



In [None]:
#00 set up directories 
mkdir ./maf55/${file} 

#01 Create vcfs for each individual 
#maf55
bcftools view -Oz -o ./maf55/${file}/${file}.maf55.vcf.gz -s ${file} -q 0.05 /home/hrclndnn/snpEff/Uarct/Uarctos.ann.vcf.gz 

#02 Create filtered bedfiles for each individual from annotations 
#pull out scaffold name, start position, create end position, and include annotation field as unique ID for variants of interest 
#maf55
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep frameshift_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_frameshift_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep missense_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_missense_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep initiator_codon_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_initiator_codon_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep stop_retained_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_stop_retained_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep rare_amino_acid_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_rare_amino_acid_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep splice_acceptor_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_splice_acceptor_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep splice_donor_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_splice_donor_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep stop_lost | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_stop_lost.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep 5_prime_UTR_premature | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_5_prime_UTR_premature.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep start_lost | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_start_lost.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep stop_gained | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_stop_gained.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep synonymous_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_synonymous_variant.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep start_retained | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_start_retained.out 
bcftools view -H ./maf55/${file}/${file}.maf55.vcf.gz | grep stop_retained_variant | awk 'BEGIN {OFS="\t"} {print $1,$2,$2+1,$8}' > ./maf55/${file}/${file}.maf55_stop_retained_variant.out 


#03 Concatenate individuals' variants into pseudo-bed file format 
#maf55
cat ./maf55/${file}/${file}.maf55_frameshift_variant.out ./maf55/${file}/${file}.maf55_missense_variant.out ./maf55/${file}/${file}.maf55_initiator_codon_variant.out ./maf55/${file}/${file}.maf55_stop_retained_variant.out ./maf55/${file}/${file}.maf55_rare_amino_acid_variant.out ./maf55/${file}/${file}.maf55_splice_acceptor_variant.out ./maf55/${file}/${file}.maf55_splice_donor_variant.out ./maf55/${file}/${file}.maf55_stop_lost.out ./maf55/${file}/${file}.maf55_5_prime_UTR_premature.out ./maf55/${file}/${file}.maf55_start_lost.out ./maf55/${file}/${file}.maf55_stop_gained.out ./maf55/${file}/${file}.maf55_synonymous_variant.out ./maf55/${file}/${file}.maf55_start_retained.out ./maf55/${file}/${file}.maf55_stop_retained_variant.out > ./maf55/${file}/${file}.maf55_all_variants.out 

#04 Remove duplicate lines & sort 
#maf55
sort -u ./maf55/${file}/${file}.maf55_all_variants.out > ./maf55/${file}/${file}.maf55_allVar_dupRemoved.txt 

#05 find intersection of bed files 
#output both where intersection occurs and variant annotation info 
#maf55
bedtools intersect -wa -wb -a /home/hrclndnn/snpEff/Uarct/Uarct_ROHs/${file}.bed -b /home/hrclndnn/snpEff/Uarct/maf55/${file}/${file}.maf55_allVar_dupRemoved.txt > /home/hrclndnn/snpEff/Uarct/maf55/${file}.maf55_intersect_rohVar.txt


In [None]:
#!/bin/bash

files="AK17500
AK17512
AK17578
AKAdmiralty1
AKAdmiralty2
AKAdmiralty3
AKBaranof1
AKBaranof2
AKChichagof1
AKChichagof2
AKChichagof3
AKChichagof4
AKChichagof5
AKDenali1
AKKenai	
APN2
GRE2
GYE906
GYE922
GYE953
JPHc1
JPHc2
JPHe1
JPHe2
JPHs1
JPHs2
MTgnp
OFS01   
RF01    
SJS01
SWEDalarna"

for file in $files
do

#count variants by categories in intersections
echo ${file}
grep -o frameshift_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o missense_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o initiator_codon_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o stop_retained_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o rare_amino_acid_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o splice_acceptor_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o splice_donor_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o stop_lost ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o 5_prime_UTR_premature ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o start_lost ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o stop_gained ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o synonymous_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o start_retained ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l
grep -o stop_retained_variant ${file}/${file}.maf55_allVar_dupRemoved.txt | wc -l


done

In [None]:
#!/bin/bash

files="AK17500
AK17512
AK17578
AKAdmiralty1
AKAdmiralty2
AKAdmiralty3
AKBaranof1
AKBaranof2
AKChichagof1
AKChichagof2
AKChichagof3
AKChichagof4
AKChichagof5
AKDenali1
AKKenai	
APN2
GRE2
GYE906
GYE922
GYE953
JPHc1
JPHc2
JPHe1
JPHe2
JPHs1
JPHs2
MTgnp
OFS01   
RF01    
SJS01
SWEDalarna"

for file in $files
do

#count variants by categories in intersections
echo ${file}
grep -o frameshift_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o missense_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o initiator_codon_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o stop_retained_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o rare_amino_acid_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o splice_acceptor_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o splice_donor_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o stop_lost ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o 5_prime_UTR_premature ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o start_lost ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o stop_gained ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o synonymous_variant ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o start_retained ${file}.maf55_intersect_rohVar.txt | wc -l
grep -o stop_retained_variant ${file}.maf55_intersect_rohVar.txt | wc -l

done