# Run genomic scans of Fst, pi, Dxy
Ave/Pla samples 

In [None]:
module load bcftools vcftools
export PATH=$PATH:$HOME/genomics_general:$HOME/genomics_general/VCF_processing
baseDIR=~/snap_hap_repHZ/genome_scans
cd $baseDIR

## Window estimates with genomics_general

In [None]:
## Convert all Stitch-Vcfs to geno format
baseDIR=~/snap_hap_repHZ/genome_scans
for chrom in Chr{1..8}
do
    vcf=~/snap_hap_repHZ/Stitch/$chrom/$chrom.AvePla.stitch.SnpOnly.final.vcf.gz
    geno=$baseDIR/geno/$chrom.AvePla.stitch.SnpOnly.final.geno

    echo -e $chrom'\n'`basename $vcf`'\n'`basename $geno`'\n----\n'
    time parseVCF.py -i $vcf -o $geno --ploidy 2 --addRefTrack
    echo -e 'Done\n'
done

In [None]:
## Scans for AvePla 10kb wins 10kb steps
baseDIR=~/snap_hap_repHZ/genome_scans
cd $baseDIR
for chrom in Chr{1..8}
do
    ## chromRegion Data
    echo -e '\n'$chrom'\n\n'
    # chrom=Chr1
    windType=coordinate
    windSize=10000
    stepSize=10000

    ## Population Data
    pops=AvePla
    popsFile=$baseDIR/samples/AvePla_pools.txt
    pop1=Ave
    pop2=Pla
    
    ## Input/Output
    geno=$baseDIR/geno/$chrom.AvePla.stitch.SnpOnly.final.geno
    output=$baseDIR/$pops/${pop1}-${pop2}_${chrom}_byPos_w${windSize}_s${stepSize}.csv.gz
    threads=10
    genoFormat=phased
    if [ ! -d $baseDIR/$pops ]; then mkdir $baseDIR/$pops; fi
    
    ## Run genomic scan
    time popgenWindows.py   -g $geno -o $output -f $genoFormat \
                            --windType $windType -w $windSize -s $stepSize \
                            -p $pop1 -p $pop2 --popsFile $popsFile --ploidy 2 \
                            --writeFailedWindow --addWindowID -T $threads
done

In [None]:
## Scans for AvePla-FrYe 10kb wins 10kb steps
baseDIR=~/snap_hap_repHZ/genome_scans
cd $baseDIR
for chrom in Chr{1..8}
do
    ## chromRegion Data
    echo -e '\n'$chrom'\n\n'
    # chrom=Chr1
    windType=coordinate
    windSize=10000
    stepSize=10000

    ## Population Data
    pops=AvePla-FRYe
    popsFile=$baseDIR/samples/AvePla-FRYe_pools.txt
    pop1=AveFR
    pop2=AveY
    pop3=PlaFR
    pop4=PlaY

    ## Input/Output
    geno=$baseDIR/geno/$chrom.AvePla.stitch.SnpOnly.final.geno
    output=$baseDIR/$pops/${pop1}-${pop2}-${pop3}-${pop4}_${chrom}_byPos_w${windSize}_s${stepSize}.csv.gz
    threads=10
    genoFormat=phased
    if [ ! -d $baseDIR/$pops ]; then mkdir $baseDIR/$pops; fi

    ## Run genomic scan
    time popgenWindows.py   -g $geno -o $output -f $genoFormat \
                            --windType $windType -w $windSize -s $stepSize \
                            -p $pop1 -p $pop2 -p $pop3 -p $pop4 --popsFile $popsFile --ploidy 2 \
                            --writeFailedWindow --addWindowID -T $threads
done

In [None]:
## Scans for AvePla-FrYe 3kb wins 300b steps
baseDIR=~/snap_hap_repHZ/genome_scans
cd $baseDIR
for chrom in Chr{1..8}
do
    ## chromRegion Data
    echo -e '\n'$chrom'\n\n'
    # chrom=Chr1
    windType=coordinate
    windSize=3000
    stepSize=300

    ## Population Data
    pops=AvePla-FRYe
    popsFile=$baseDIR/samples/AvePla-FRYeHybrid_pools.txt
    pop1=AveFR
    pop2=AveY
    pop3=PlaFR
    pop4=PlaY

    ## Input/Output
    geno=$baseDIR/geno/$chrom.AvePla.stitch.SnpOnly.final.geno
    output=$baseDIR/$pops/${pop1}-${pop2}-${pop3}-${pop4}_${chrom}_byPos_w${windSize}_s${stepSize}.csv.gz
    threads=8
    genoFormat=phased
    if [ ! -d $baseDIR/$pops ]; then mkdir $baseDIR/$pops; fi

    ## Run genomic scan
    time popgenWindows.py   -g $geno -o $output -f $genoFormat \
                            --windType $windType -w $windSize -s $stepSize \
                            -p $pop1 -p $pop2 -p $pop3 -p $pop4 --popsFile $popsFile --ploidy 2 \
                            --writeFailedWindow --addWindowID -T $threads
done

In [None]:
## Scans for AvePla-FrYe 3kb wins 3kb steps
baseDIR=~/snap_hap_repHZ/genome_scans
cd $baseDIR
for chrom in Chr{1..8}
do
    ## chromRegion Data
    echo -e '\n'$chrom'\n\n'
    # chrom=Chr1
    windType=coordinate
    windSize=3000
    stepSize=3000

    ## Population Data
    pops=AvePla-FRYe
    popsFile=$baseDIR/samples/AvePla-FRYeHybrid_pools.txt
    pop1=AveFR
    pop2=AveY
    pop3=PlaFR
    pop4=PlaY

    ## Input/Output
    geno=$baseDIR/geno/$chrom.AvePla.stitch.SnpOnly.final.geno
    output=$baseDIR/$pops/${pop1}-${pop2}-${pop3}-${pop4}_${chrom}_byPos_w${windSize}_s${stepSize}.csv.gz
    threads=8
    genoFormat=phased
    if [ ! -d $baseDIR/$pops ]; then mkdir $baseDIR/$pops; fi

    ## Run genomic scan
    time popgenWindows.py   -g $geno -o $output -f $genoFormat \
                            --windType $windType -w $windSize -s $stepSize \
                            -p $pop1 -p $pop2 -p $pop3 -p $pop4 --popsFile $popsFile --ploidy 2 \
                            --writeFailedWindow --addWindowID -T $threads
done

## Fst per site (vcftools)

In [None]:
# Fst
baseDIR=~/snap_hap_repHZ/genome_scans

## Populations
pop_Ave=~/snap_hap_repHZ/genome_scans/samples/samples_Ave.txt
pop_AveY=~/snap_hap_repHZ/genome_scans/samples/samples_AveY.txt
pop_AveFR=~/snap_hap_repHZ/genome_scans/samples/samples_AveFR.txt
pop_Pla=~/snap_hap_repHZ/genome_scans/samples/samples_Pla.txt
pop_PlaY=~/snap_hap_repHZ/genome_scans/samples/samples_PlaY.txt
pop_PlaFR=~/snap_hap_repHZ/genome_scans/samples/samples_PlaFR.txt

## Vcf
inVcf=~/snap_hap_repHZ/Stitch/AvePla.all.stitch.SnpOnly.final.sorted.vcf.gz

## Run VCFTOOLS

# Ave vs Pla
output=$baseDIR/AvePla/Ave-Pla.site
vcftools    --gzvcf $inVcf \
            --weir-fst-pop $pop_Ave \
            --weir-fst-pop $pop_Pla \
            --out $output

# AveFR vs AveY
output=$baseDIR/AvePla-FRYe/AveFR-AveY.site
vcftools    --gzvcf $inVcf \
            --weir-fst-pop $pop_AveFR \
            --weir-fst-pop $pop_AveY \
            --out $output
            
# AveFR vs PlaFR
output=$baseDIR/AvePla-FRYe/AveFR-PlaFR.site
vcftools    --gzvcf $inVcf \
            --weir-fst-pop $pop_AveFR \
            --weir-fst-pop $pop_PlaFR \
            --out $output

# AveFR vs PlaY
output=$baseDIR/AvePla-FRYe/AveFR-PlaY.site
vcftools    --gzvcf $inVcf \
            --weir-fst-pop $pop_AveFR \
            --weir-fst-pop $pop_PlaY \
            --out $output

## AveY vs PlaFR
output=$baseDIR/AvePla-FRYe/AveY-PlaFR.site
vcftools    --gzvcf $inVcf \
            --weir-fst-pop $pop_AveY \
            --weir-fst-pop $pop_PlaFR \
            --out $output

## AveY vs PlaY
output=$baseDIR/AvePla-FRYe/AveY-PlaY.site
vcftools    --gzvcf $inVcf \
            --weir-fst-pop $pop_AveY \
            --weir-fst-pop $pop_PlaY \
            --out $output

## PlaFR vs PlaY
output=$baseDIR/AvePla-FRYe/PlaFR-PlaY.site
vcftools    --gzvcf $inVcf \
            --weir-fst-pop $pop_PlaFR \
            --weir-fst-pop $pop_PlaY \
            --out $output

## Allele Frequency (vcftools)

In [None]:
#### Avellanet
cd ~/snap_hap_repHZ/genome_scans/alleleFreqs

## -----
## Ave
pop=Ave
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e $pop'\t'$chrom'\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/alleleFreqs/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --freq2 --out $output
    gzip $output.frq
done

## -----
## AveFR
pop=AveFR
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e $pop'\t'$chrom'\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/alleleFreqs/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --freq2 --out $output
    gzip $output.frq
done

## -----
## AveY
pop=AveY
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e $pop'\t'$chrom'\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/alleleFreqs/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --freq2 --out $output
    gzip $output.frq
done

In [None]:
#### Planoles
cd ~/snap_hap_repHZ/genome_scans/alleleFreqs

## -----
## Pla
pop=Pla
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e $pop'\t'$chrom'\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/alleleFreqs/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --freq2 --out $output
    gzip $output.frq
done

## -----
## PlaFR
pop=PlaFR
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e $pop'\t'$chrom'\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/alleleFreqs/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --freq2 --out $output
    gzip $output.frq
done

## -----
## PlaY
pop=PlaY
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e $pop'\t'$chrom'\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/alleleFreqs/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --freq2 --out $output
    gzip $output.frq
done

## HWE or heterozygosity per site _(vcftools)_

In [None]:
#### Avellanet
cd ~/snap_hap_repHZ/genome_scans/hardy

## -----
## Ave
pop=Ave
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/hardy/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --hardy --out $output
    gzip $output.hwe
done

## -----
## AveM
pop=AveM
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/hardy/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --hardy --out $output
    gzip $output.hwe
done

## -----
## AveY
pop=AveY
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/hardy/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --hardy --out $output
    gzip $output.hwe
done

In [None]:
#### Planoles
cd ~/snap_hap_repHZ/genome_scans/hardy

## -----
## Pla
pop=Pla
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/hardy/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --hardy --out $output
    gzip $output.hwe
done

## -----
## PlaM
pop=PlaM
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/hardy/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --hardy --out $output
    gzip $output.hwe
done

## -----
## PlaY
pop=PlaY
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/hardy/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --hardy --out $output
    gzip $output.hwe
done

## Site Pi _(vcftools)_

In [None]:
#### Avellanet
cd ~/snap_hap_repHZ/genome_scans/sitePi

## -----
## Ave
pop=Ave
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/sitePi/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --site-pi --out $output
    gzip $output.sites.pi
done

## -----
## AveM
pop=AveM
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/sitePi/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --site-pi --out $output
    gzip $output.sites.pi
done

## -----
## AveY
pop=AveY
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/sitePi/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --site-pi --out $output
    gzip $output.sites.pi
done

In [None]:
#### Planoles
cd ~/snap_hap_repHZ/genome_scans/sitePi

## -----
## Pla
pop=Pla
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/sitePi/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --site-pi --out $output
    gzip $output.sites.pi
done

## -----
## PlaM
pop=PlaM
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/sitePi/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --site-pi --out $output
    gzip $output.sites.pi
done

## -----
## PlaY
pop=PlaY
sampleList=~/snap_hap_repHZ/genome_scans/samples/samples_$pop.txt
for chrom in Chr{1..8}
do
    echo -e '\n-----\n'$pop'\t'$chrom'\n-----\n'
    inVCF=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
    output=~/snap_hap_repHZ/genome_scans/sitePi/$chrom.$pop
    time vcftools --gzvcf $inVCF --keep $sampleList --site-pi --out $output
    gzip $output.sites.pi
done