# Genomic Scans in *A.majus*

In [None]:
module load bcftools/1.18
export PATH=$PATH:$HOME/genomics_general:$HOME/genomics_general/VCF_processing

In [6]:
baseDIR=/nfs/scistore18/bartogrp/apal/snap_hap/genome_scans
cd $baseDIR

## Sample Pool Information

In [None]:
cut -f4,21  ~/snap_hap/sample_info/samples_Amajus_SnapHap_LastUpdate-2023-10.txt > $baseDIR/samples/samplePools.txt

## Convert .vcf to .geno file

In [None]:
## On the terminal
stitchRun=stitchRun1
for chrom in Chr{1..8}
do
    vcf=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.vcf.gz
    parseVCF.py -i $vcf -o ${vcf/.vcf/.geno}
done

In [None]:
## Cluster implementation
stitchRun=stitchRun1
for chrom in Chr2
do
    vcf=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.vcf.gz
    sbatch -J ${chrom}_vcf2geno ~/snap_hap/genome_scans/_scripts/job-vcf2geno.sbatch.sh $vcf
done

```
#SBATCH --output=%x-%A.out
#SBATCH --error=%x-%A.out
#SBATCH --open-mode=append
#SBATCH --partition=defaultp
### #SBATCH --constraint=bookworm
### #SBATCH --constraint=bullseye
#SBATCH --time=240:00:00
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=20G
```

Execution Time: 2-4 hrs.

## Fst, dxy, pi

In [None]:
## Chr6

baseDIR=~/snap_hap/genome_scans
chrom=Chr6
geno=~/snap_hap/variants/stitch/${chrom}/Am_all_stitchRun1_${chrom}.final.geno.gz
window=10000

output=$baseDIR/popgenStats/${outPrefix}_${chrom}_w${window}.csv.gz
popFile=$baseDIR/samples/samplePools.txt

In [None]:
popgenWindows.py -g $geno \
	-o $output \
	-f phased -w $window \
	-p MF -p YF \
	--popsFile $popFile \
	--writeFailedWindow

### MF vs YF

In [None]:
## Cluster implementation of Fst in windows w GenomicsGeneral
baseDIR=~/snap_hap/genome_scans
stitchRun=stitchRun1
# window=1
window=10000
step=1000
popFile=$baseDIR/samples/N960_Pools-MF-YF.txt
outPrefix=MY

cd $baseDIR/jobs
# for chrom in Chr{1..8}
for chrom in Chr6
do    
    geno=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.geno.gz
    
    #window
    windType=coordinates
    output=$baseDIR/popgenStats/${outPrefix}_${chrom}_${windType}_w${window}_s${step}.csv.gz
    
    # #per-site
    # windType=sites
    # output=$baseDIR/popgenStats/${outPrefix}_${chrom}_${windType}_w${window}.csv.gz

    sbatch -J ${chrom}_${outPrefix}_pgWin $baseDIR/_scripts/job-popgenWindows.sbatch.sh \
            $baseDIR $chrom $geno $output $window $popFile MF YF $windType
done

NB: `-- windType` controls the window type in Simon's popgenWindows.py script. 

In [None]:
## Fst for RosEl in windows+steps w GenomicsGeneral
## RosEL Chr6:52500000-53500000

## Initiate variables
baseDIR=~/snap_hap/genome_scans
stitchRun=stitchRun1

chrom=Chr6
regionName=RosEl
regionStart=52500000
regionEnd=53500000
geno_full=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.geno.gz

windType=coordinate
window=10000
step=1000

p1=MF
p2=YF
popFile=$baseDIR/samples/N960_Pools-MF-YF.txt

outPrefix=MY
output=$baseDIR/popgenStats/${outPrefix}_${regionName}_${windType}_w${window}_s${step}.csv.gz

cd $baseDIR

## Create RosEl geno
zcat $geno_full | awk '$2 > 52500000 && $2 < 53500000' > $baseDIR/geno_regions/tmp.geno # Filter region
zcat $geno_full | head -1 > $baseDIR/geno_regions/tmp.header # Paste only header line
cat $baseDIR/geno_regions/tmp.header $baseDIR/geno_regions/tmp.geno | \
    bgzip > $baseDIR/geno_regions/${regionName}.geno.gz # Combine header and geno
rm $baseDIR/geno_regions/tmp.header # Cleanup
rm $baseDIR/geno_regions/tmp.geno # Cleanup
geno=$baseDIR/geno_regions/${regionName}.geno.gz

popgenWindows.py -g $geno -f phased -o $output \
    --windType $windType -w $window -s $step \
    -p $p1 -p $p2 --popsFile $popFile \
	--threads 10 --writeFailedWindow

NB: Change varaiables for other regions of interest...
<br>

In [None]:
## Cluster implementation of Fst per-site w vcftools

baseDIR=~/snap_hap/genome_scans
stitchRun=stitchRun1
pop1=$baseDIR/samples/sampleNames_MF.txt
pop2=$baseDIR/samples/sampleNames_YF.txt
outPrefix=MY

cd $baseDIR/jobs
for chrom in Chr{1..8}
do    
    inVCF=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.vcf.gz
    output=$baseDIR/popgenStats/${outPrefix}_${chrom}_Fst

    sbatch -J ${chrom}_${outPrefix}_Fst $baseDIR/_scripts/job-Fst_vcftools.sbatch.sh \
            $baseDIR $chrom $inVCF $pop1 $pop2 $output
done

In [None]:
## Cluster implementation of pi-within per-site w vcftools
baseDIR=~/snap_hap/genome_scans
stitchRun=stitchRun1

cd $baseDIR/jobs
for chrom in Chr{1..8}
do    
    inVCF=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.vcf.gz
    
    ## Pop1
    pop=$baseDIR/samples/sampleNames_MF.txt
    popPrefix=MF
    output=$baseDIR/popgenStats/${popPrefix}_${chrom}_pi
    sbatch -J ${chrom}_${popPrefix}_pi $baseDIR/_scripts/job-pi_vcftools.sbatch.sh \
        $baseDIR $chrom $inVCF $pop $output

    ## Pop2
    pop=$baseDIR/samples/sampleNames_YF.txt
    popPrefix=YF
    output=$baseDIR/popgenStats/${popPrefix}_${chrom}_pi
    sbatch -J ${chrom}_${popPrefix}_pi $baseDIR/_scripts/job-pi_vcftools.sbatch.sh \
        $baseDIR $chrom $inVCF $pop $output
done

### Ave vs. Pla


In [None]:
baseDIR=~/snap_hap/genome_scans
stitchRun=stitchRun1
window=10000
popFile=$baseDIR/samples/N96_Pools-Ave-Pla.txt
outPrefix=AvePla

cd $baseDIR/jobs
for chrom in Chr2
# for chrom in Chr{1..8}
do
    geno=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.geno.gz
    output=$baseDIR/popgenStats/${outPrefix}_${chrom}_w${window}.csv.gz

    sbatch -J ${chrom}_${outPrefix}_pgWin $baseDIR/_scripts/job-popgenWindows.sbatch.sh \
            $baseDIR $chrom $geno $output $window $popFile Ave Pla
done

## Allele frequencies

In [None]:
baseDIR=~/snap_hap/genome_scans
outPrefix=MY
popFile=$baseDIR/samples/N960_Pools-MF-YF.txt

for chrom in Chr{1..8}
do
    echo $chrom
    geno=~/snap_hap/variants/stitch/${chrom}/Am_all_stitchRun1_${chrom}.final.geno.gz
    outputMAF=$baseDIR/AF/${outPrefix}_${chrom}.MAF.csv.gz
    outputDerived=$baseDIR/AF/${outPrefix}_${chrom}.derivedFreq.csv.gz
    
    # freq.py -g $geno -p MF -p YF --popsFile $popFile --target minor --threads 10 -o $outputMAF
    freq.py -g $geno -p MF -p YF --popsFile $popFile --target derived --threads 10 -o $outputDerived

done

In [None]:
## VCFtools (cluster) implementation of estimating allele frequency
module load vcftools

baseDIR=~/snap_hap/genome_scans
stitchRun=stitchRun1

cd $baseDIR/jobs/
for chrom in Chr{1..8}
do
    echo $chrom
    inVCF=~/snap_hap/variants/stitch/${chrom}/Am_all_${stitchRun}_${chrom}.final.vcf.gz
    
    ## MF
    flank=MF
    sampleFile=$baseDIR/samples/sampleNames_MF.txt
    outputMF=$baseDIR/AF/MF_${chrom}
    sbatch -J ${chrom}-frq2-${flank} ~/snap_hap/genome_scans/_scripts/job-freq2.sbatch.sh $baseDIR \
                $chrom $inVCF $sampleFile $outputMF
    ## YF
    flank=YF
    sampleFile=$baseDIR/samples/sampleNames_YF.txt
    outputYF=$baseDIR/AF/YF_${chrom}
    sbatch -J ${chrom}-frq2-${flank} ~/snap_hap/genome_scans/_scripts/job-freq2.sbatch.sh $baseDIR \
                $chrom $inVCF $sampleFile $outputYF
done