# Relate
Date: 2024-08-21  
update: 2024-12-16

infer genealogies with all magenta and yellow samples from *Ave* and *Pla*

In [None]:
PATH_TO_RELATE=~/_softwares/relate_v1.2.2
export PATH=$PATH:~/_softwares/relate_v1.2.2/bin/:~/_softwares/relate_v1.2.2/scripts/
module load plink
module load bcftools
module load R
cd ~/snap_hap_repHZ/relate

baseDIR=~/snap_hap_repHZ/relate
popLabels=~/snap_hap_repHZ/relate/AvePla.MY.n74.poplabels

# Steps for running Relate

In [None]:
## Initiate variables
## -----
chrom=$1
baseDIR=~/snap_hap_repHZ/relate

vcf=~/snap_hap_repHZ/statphase/AvePla_FrYe/$chrom.AvePla.FrYe.sorted.statphased.vcf.gz
# vcf=/nfs/scistore18/bartogrp/apal/snap_hap_repHZ/relate_test/test2.vcf.gz
ancestral=~/snap_hap_repHZ/ancestral_alleles/allelePolarised_chrom/ancestral_$chrom.txt
polarisedVcf=~/snap_hap_repHZ/relate/$chrom/$chrom.polarised
popLabels=$baseDIR/AvePla.MY.n74.poplabels

mu="5.7e-9"
Ne=813388

if [ ! -d $baseDIR/$chrom ]; then mkdir -p $baseDIR/$chrom; fi
cd $baseDIR/$chrom

echo -e "\n\n"
echo VCF: $vcf
echo ancestral: $ancestral
echo Polarised VCF: $polarisedVcf
echo Population File: $popLabels
echo mutation rate: $mu
echo Ne: $Ne
echo -e "\n\n"

## Polarise VCFs

In [None]:
## Create text file with ancestral allele information
tail +2 ~/snap_hap_repHZ/ancestral_alleles/allelePolarised_chrom/allelePolarised_Chr6.csv | \
    cut -d, -f2,1,35 | tr ',' '\t' | \
    awk '{print $2":"$1"\t"$3}' > ~/snap_hap_repHZ/ancestral_alleles/allelePolarised_chrom/ancestral_Chr6.txt

## Polarise bi-alleleic phased VCF such that the ancestral allele is always denoted by 0
vcf=~/snap_hap_repHZ/statphase/AvePla_FrYe/Chr6.AvePla.FrYe.sorted.statphased.vcf.gz
ancestral=~/snap_hap_repHZ/ancestral_alleles/allelePolarised_chrom/ancestral_Chr6.txt
out=~/snap_hap_repHZ/relate/polarised_VCFs/polarised_Chr6
plink2 --vcf $vcf --set-all-var-ids Chr@:\# --ref-allele 'force' $ancestral 2 1 --export vcf --out $out

## Preprocessing

In [None]:
## Convert to haps/sample format
RelateFileFormats --mode ConvertFromVcf --haps ./Chr6.haps --sample ./Chr6.sample -i ./polarised_VCFs/polarised_Chr6

## Generate SNP annotation
RelateFileFormats --mode GenerateSNPAnnotations --haps ./Chr6.haps --sample ./Chr6.sample --poplabels ./AvePla.MY.n74.poplabels -o Chr6

## Run Relate

In [None]:
Relate --mode All -m 5.7e-9 -N 813388 --haps ./Chr6.haps --sample ./Chr6.sample --map ./Chr6.map --annot ./Chr6.annot --seed 420 -o out

## Estimate population size

In [None]:
## Estimate population sizes for each chromosome

chrom=Chr6
cd $baseDIR/$chrom

time $PATH_TO_RELATE/bin/RelateCoalescentRate \
                --mode EstimatePopulationSize \
                --poplabels $popLabels \
                -i rel_chr${chrom/Chr} \
                -o rel_chr${chrom/Chr}
                
time $PATH_TO_RELATE/bin/RelateCoalescentRate \
                --mode FinalizePopulationSize \
                --poplabels popLabels \
                -i rel_chr${chrom/Chr} \
                -o rel_chr${chrom/Chr}

In [None]:
## Estimate population sizes for all chromosomes

cd $baseDIR/estimatePopSize
popLabels=~/snap_hap_repHZ/relate/AvePla.MY.n74.poplabels
mu="5.7e-9"

ln -s ../*/rel_chr?.anc.gz .
ln -s ../*/rel_chr?.mut.gz .

time $PATH_TO_RELATE/bin/RelateCoalescentRate \
                --mode EstimatePopulationSize \
                -m $mu \
                --poplabels $popLabels \
                -i rel \
                -o rel \
                --years_per_gen 3 \
                --first_chr 1 \
                --last_chr 8 \
                --num_samples 5 \
                --seed 420
                
time $PATH_TO_RELATE/bin/RelateCoalescentRate \
                --mode FinalizePopulationSize \
                --poplabels $popLabels \
                -i rel \
                -o rel

In [None]:
chrom=Chr3
cd $baseDIR/Chr3
sbatch -J rel_chr3_joint ~/snap_hap_repHZ/relate/_scripts/job-relate_EstimatePopSize-joint-chrom.sbatch.sh chr3

## Joint fitting of population size and branch lengths

In [None]:
cd $baseDIR/estimatePopSize_joint
popLabels=~/snap_hap_repHZ/relate/AvePla.MY.n74.poplabels
mu="5.7e-9"

# ln -s /nfs/scistore18/bartogrp/apal/snap_hap_repHZ/relate/Chr?/rel_chr?.anc.gz .
# ln -s /nfs/scistore18/bartogrp/apal/snap_hap_repHZ/relate/Chr?/rel_chr?.mut.gz .
time ~/_softwares/relate_v1.2.2/scripts/EstimatePopulationSize/EstimatePopulationSize.sh \
              -i rel \
              -o rel_joint \
              -m $mu \
              --poplabels $popLabels \
              --pops_of_interest AveM,AveY,PlaM,PlaY \
              --noanc 0 \
              --threshold 0 \
              --years_per_gen 3 \
              --first_chr 1 \
              --last_chr 8 \
              --num_iter 10 \
              --seed 420 \
              --threads 16

bgzip *.dist

In [None]:
## Calculate average mutation rate
$PATH_TO_RELATE/bin/RelateMutationRate \
                 --mode Avg\
                 -i rel \
                 -o rel \
                 --first_chr 1 \
                 --last_chr 8 \
                 --years_per_gen 3 \

In [None]:
## Reestimate branch lengths
chrom=Chr4
$PATH_TO_RELATE/bin/RelateCoalescentRate \
                --mode ReEstimateBranchLengths \
                -i rel_$chrom \
                -o rel_updated \
                --mrate ../estimatePopSize_joint/rel_joint_avg.rate \
                --coal ../estimatePopSize_joint/rel_joint.coal \
                -m 5.7e-9 \
                --seed 420

cd $baseDIR/reEstimateBranchLengths
sbatch -J chr1 ~/snap_hap_repHZ/relate/_scripts/job-relate_reEstimateBranchLengths.sbatch.sh chr1

## Selection

In [None]:
cd ~/snap_hap_repHZ/relate/estimateSelection
chrom=chr8
$PATH_TO_RELATE/scripts/DetectSelection/DetectSelection.sh -i ../reEstimateBranchLengths/rel_${chrom}_updated -o rel_${chrom}_selection -m 5.7e-9 --years_per_gen 3

In [None]:
chrom=chr6
$PATH_TO_RELATE/scripts/SampleBranchLengths/SampleBranchLengths.sh \
                 -i rel_chr6.popsize \
                 -o rel_RosEl \
                 -m 5.7e-9 \
                 --coal rel_chr6.popsize.coal \
                 --format n \
                 --num_samples 10 \
                 --first_bp 50000000 \
                 --last_bp 55000000 \
                 --seed 420

## Extract newick trees

In [None]:
## Extract trees in NEWICK format
$PATH_TO_RELATE/bin/RelateExtract\
                 --mode AncToNewick \
                 --anc rel_chr2.anc \
                 --mut rel_chr2.mut \
                 --first_bp 1 \
                 --last_bp 77117553 \
                 -o rel_chr${chrom/Chr}


baseDIR=~/snap_hap_repHZ/relate
chrom=chr2
start=1
# end=71919034 #chr1
end=77118269 #chr2
# end=65231163 #chr3
# end=54887108 #chr4
# end=71106538 #chr5
end=55699338 #chr6
# end=55564713 #chr7
# end=57431585 #chr8

ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end


time $PATH_TO_RELATE/bin/RelateExtract \
                 --mode AncToNewick \
                 --anc $ancFile \
                 --mut $mutFile \
                 --first_bp $start \
                 --last_bp $end \
                 -o $outPrefix

In [None]:
cd ~/snap_hap_repHZ/relate/newickTrees/jobs
baseDIR=~/snap_hap_repHZ/relate

#chr1
chrom=chr1
start=1
end=71919034
ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

# #chr2
# chrom=chr2
# start=1
# end=77118269
# ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
# mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
# outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
# sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

#chr3
chrom=chr3
start=1
end=65231163
ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

#chr4
chrom=chr4
start=1
end=54887108
ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

#chr5
chrom=chr5
start=1
end=71106538
ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

#chr6
chrom=chr6
start=1
# end=55699338
# start=52880000
# end=52920000
end=100000
ancFile=$baseDIR/${chrom/c/C}/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

#chr7
chrom=chr7
start=1
end=55564713
ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

#chr8
chrom=chr8
start=1
end=57431585
ancFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz
mutFile=$baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz
outPrefix=$baseDIR/newickTrees/rel_$chrom-$start-$end
sbatch -J nk${chrom/chr} $baseDIR/_scripts/job-relate_AncToNewick.sbatch.sh $chrom $start $end $ancFile $mutFile $outPrefix

## Draw trees

In [None]:
## Extract trees for Flavia

for pos in 53652042 53652180 53710256 53710327 53712795 53712984 53714175
do
    echo $pos
    geneName=flavia
    $PATH_TO_RELATE/scripts/TreeView/TreeView.sh \
                 --haps ./Chr2/Chr2.haps.gz \
                 --sample ./Chr2/Chr2.sample \
                 --anc ./Chr2/rel_chr2_joint.anc.gz \
                 --mut ./Chr2/rel_chr2_joint.mut.gz \
                 --poplabels ./AvePla.MY.n74.poplabels \
                 --bp_of_interest $pos \
                 --years_per_gen 3 \
                 -o ./treeViews/chr2_${geneName}/chr2_${pos}_${geneName}
done

In [None]:
## Extract trees for SULF

for pos in 38355441 38355484 38355558 38355614 38355633 38355662 38355702
do
    echo $pos
    geneName=sulf
    $PATH_TO_RELATE/scripts/TreeView/TreeView.sh \
                 --haps ./Chr4/Chr4.haps.gz \
                 --sample ./Chr4/Chr4.sample \
                 --anc ./Chr4/rel_chr4_joint.anc.gz \
                 --mut ./Chr4/rel_chr4_joint.mut.gz \
                 --poplabels ./AvePla.MY.n74.poplabels \
                 --bp_of_interest $pos \
                 --years_per_gen 3 \
                 -o ./treeViews/chr4_sulf/chr4_${pos}_${geneName}
done

In [None]:
## Extract trees for Rubia

for pos in 6307344 6307410 6307489 6307562 6307668 6307778 6307854
do
    echo $pos
    geneName=rubia
    $PATH_TO_RELATE/scripts/TreeView/TreeView.sh \
                 --haps ./Chr5/Chr5.haps.gz \
                 --sample ./Chr5/Chr5.sample \
                 --anc ./Chr5/rel_chr5_joint.anc.gz \
                 --mut ./Chr5/rel_chr5_joint.mut.gz \
                 --poplabels ./AvePla.MY.n74.poplabels \
                 --bp_of_interest $pos \
                 --years_per_gen 3 \
                 -o ./treeViews/chr5_${geneName}/chr5_${pos}_${geneName}
done

In [None]:
## Extract trees for Rosel

# module load R
for pos in 52884457 52884489 52884528 52884553 52884570 52884624 52884770 #ROS1
# for pos in 53058075 53058990 53062699 53062934 53063060 53063137
# for pos in 52917425 52917590 52917838 52999532 53057894 53057984 53057984 53058075 53058990 53062699 53062934 53063060 53063137
do
    echo $pos
    geneName=rosel
    $PATH_TO_RELATE/scripts/TreeView/TreeView.sh \
                 --haps ./Chr6/Chr6.haps.gz \
                 --sample ./Chr6/Chr6.sample \
                 --anc ./Chr6/rel_chr6_joint.anc.gz \
                 --mut ./Chr6/rel_chr6_joint.mut.gz \
                 --poplabels ./AvePla.MY.n74.poplabels \
                 --bp_of_interest $pos \
                 --years_per_gen 3 \
                 -o ./treeViews/chr6_${geneName}/chr6_${pos}_${geneName} 
done

## Convert to tskit format

In [None]:
## Extract trees in NEWICK format
baseDIR=~/snap_hap_repHZ/relate
chrom=chr8
$PATH_TO_RELATE/bin/RelateFileFormats \
                --mode ConvertToTreeSequence \
                -i $baseDIR/reEstimateBranchLengths/rel_${chrom}_updated \
                -o $baseDIR/treeSequence/rel_${chrom}_updated
gzip $baseDIR/treeSequence/rel_${chrom}_updated.trees

In [None]:
baseDIR=~/snap_hap_repHZ/relate
chrom=chr6
$PATH_TO_RELATE/bin/RelateFileFormats \
                --mode ConvertToTreeSequence \
                --first_bp 10000000 \
                --last_bp 15000000 \
                -i $baseDIR/reEstimateBranchLengths/rel_${chrom}_updated \
                -o $baseDIR/treeSequence/rel_${chrom}_test_updated
# gzip $baseDIR/treeSequence/rel_${chrom}_updated.trees

In [None]:
baseDIR=~/snap_hap_repHZ/relate
chrom=chr2
~/_softwares/relate_lib/bin/Convert \
        --mode ConvertToTreeSequence \
		--compress \
		--anc $baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz \
		--mut $baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz \
		-o $baseDIR/treeSequence/rel_${chrom}_updated.compressed

In [None]:
chrom=chr6
$PATH_TO_RELATE/bin/RelateExtract --mode AncMutForSubregion \
        --first_bp 52600000 \
        --last_bp 53500000 \
        --anc $baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.anc.gz \
        --mut $baseDIR/reEstimateBranchLengths/rel_${chrom}_updated.mut.gz \
        -o $baseDIR/subRegions/rosel

In [None]:
baseDIR=~/snap_hap_repHZ/relate
chrom=chr6
~/_softwares/relate_lib/bin/Convert \
        --mode ConvertToTreeSequence \
		--compress \
		--anc $baseDIR/subRegions/rosel.anc \
		--mut $baseDIR/subRegions/rosel.mut \
		-o $baseDIR/treeSequence/rosel