# Imputation with STITCH
  
**Date:** 2023-Sep-26  
**last update:** 2023-Sep-26

In [None]:
module load stitch/1.6.7 bcftools/1.16
module list

<br>

## Split chromosome cooridnates into smaller 1MB chunks 

In [2]:
WORKDIR=/nfs/scistore18/bartogrp/apal/snap_hap
# cat $WORKDIR/ref_genome/chromSize.txt

In [3]:
## Make chrom_segments files for each chromosome
for chr in Chr{1..8}
do
    chromLength=$(grep $chr ~/snap_hap/ref_genome/chromSize.txt | cut -f2)
    paste <(seq 1 1000000 $chromLength) <(printf "$(seq 1000000 1000000 $chromLength)\n$chromLength") > ~/snap_hap/ref_genome/chromSegments/${chr}_segments.txt
    echo $chr $chromLength: $(cat ~/snap_hap/ref_genome/chromSegments/${chr}_segments.txt | wc -l) segments 
done

Chr1 71919034: 72 segments
Chr2 77118269: 78 segments
Chr3 65231163: 66 segments
Chr4 54887108: 55 segments
Chr5 71106538: 72 segments
Chr6 55699338: 56 segments
Chr7 55564713: 56 segments
Chr8 57431585: 58 segments


In [4]:
head ~/snap_hap/ref_genome/chromSegments/Chr4_segments.txt

1	1000000
1000001	2000000
2000001	3000000
3000001	4000000
4000001	5000000
5000001	6000000
6000001	7000000
7000001	8000000
8000001	9000000
9000001	10000000


## Run STITCH on each chromosome segment (step-by-step)

### Initiate variables

In [5]:
## Initiate variables

# General
vcfDIR=~/snap_hap/variants/vcf_bcftools_Am_all
stitchDIR=~/snap_hap/variants/stitch
chrom=Chr1
chromSegments=~/snap_hap/ref_genome/chromSegments/${chrom}_segments.txt
SLURM_ARRAY_TASK_ID=1

# posfile variables
chromStart=$(sed -n "${SLURM_ARRAY_TASK_ID}p" $chromSegments | cut -f1)
chromEnd=$(sed -n "${SLURM_ARRAY_TASK_ID}p" $chromSegments | cut -f2)
buffer=100000
inVCF=$vcfDIR/$chrom/Am_all_bcftools_${chrom}_SnpGap5_biSNPs_filtered-DP_500-7732_QUAL20_MQ30.vcf.gz
posfile=$stitchDIR/${chrom}/posfile_chromSegments/${chrom}_${chromStart}-${chromEnd}_buffer$buffer.pos


# STITCH variables
K=75
downsampleToCov=20
bx_tag=TRUE
ngen=100
niter=40
expRate=0.5
plot=FALSE
bamlist=~/snap_hap/sample_info/bam_info/bams_Am_all.txt
outputDIR=$stitchDIR/$chrom/stitch_chromSegments/$(basename ${posfile/.pos})_K${K}_cov${downsampleToCov}_bxTRUE_niter${niter}_ngen${ngen}_r${expRate}_plotFALSE

### Create _posfile_

In [6]:
echo -e "\n$chrom $chromStart $chromEnd"
echo inVCF: $inVCF
echo posfile: $posfile

## Create posfile
# time bash ~/snap_hap/_scripts/bash/stitch/extract_posfile.sh $chrom $chromStart $chromEnd $inVCF $posfile

# echo -e '\n'


Chr1 1 1000000
inVCF: /nfs/scistore18/bartogrp/apal/snap_hap/variants/vcf_bcftools_Am_all/Chr1/Am_all_bcftools_Chr1_SnpGap5_biSNPs_filtered-DP_500-7732_QUAL20_MQ30.vcf.gz
posfile: /nfs/scistore18/bartogrp/apal/snap_hap/variants/stitch/Chr1/posfile_chromSegments/Chr1_1-1000000_buffer50000.pos


### Run STITCH

## Cluster implementation: STITCH on each chromosome segment

### Run1

In [None]:
cd /nfs/scistore18/bartogrp/apal/snap_hap/variants/jobs/stitch_chromSegments

## Define Variables
buffer=100000
K=75
downsampleToCov=20
use_bx_tag=TRUE
ngen=100
niter=40
expRate=0.5
plot=FALSE

In [None]:
## Chr1
#chrom=Chr1
#segments=$(cat ~/snap_hap/ref_genome/chromSegments/${chrom}_segments.txt | wc -l)
#echo $chrom $segments segments

No. of segments for each chromosome:
```
Chr1 71919034: 72 segments
Chr2 77118269: 78 segments
Chr3 65231163: 66 segments
Chr4 54887108: 55 segments
Chr5 71106538: 72 segments
Chr6 55699338: 56 segments
Chr7 55564713: 56 segments
Chr8 57431585: 58 segments
```

In [None]:
sbatch --array=1-72 -J Chr1 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr1 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-78 -J Chr2 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr2 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-66 -J Chr3 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr3 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-55 -J Chr4 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr4 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-72 -J Chr5 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr5 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-56 -J Chr6 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr6 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-56 -J Chr7 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr7 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-58 -J Chr8 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch.sbatch Chr8 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot

```
#SBATCH --output=%x_%a-stitch-%A.out
#SBATCH --error=%x_%a-stitch-%A.out
#SBATCH --open-mode=append

#SBATCH --partition=defaultp
#SBATCH --exclude=zeta[243-262]
#SBATCH --time=120:00:00
#SBATCH --ntasks=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=10
#SBATCH --mem-per-cpu=20G

#SBATCH --mail-user=arka.pal@ist.ac.at
#SBATCH --mail-type=FAIL,END

#SBATCH --no-requeue
#SBATCH --export=NONE
unset SLURM_EXPORT_ENV

## Set the number of threads to the SLURM internal variable
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
```

NB: It doesn't work with `-c 5 -m 8G`. For some runs, i used `--mem-per-cpu=16G`

Generally it takes somewhere between 1 and 2 days to run each 1MB chunk. It heavily depends on the number of SNPs. 

### Run2

In [None]:
cd /nfs/scistore18/bartogrp/apal/snap_hap/variants/jobs/stitch-run2_chromSegments

## Define Variables
buffer=100000
K=75
downsampleToCov=20
use_bx_tag=TRUE
ngen=100
niter=40
expRate=0.5
plot=FALSE

In [None]:
sbatch --array=1-72 -J Chr1 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr1 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-78 -J Chr2 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr2 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-66 -J Chr3 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr3 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-55 -J Chr4 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr4 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-72 -J Chr5 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr5 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-56 -J Chr6 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr6 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-56 -J Chr7 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr7 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-58 -J Chr8 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run2.sbatch.sh Chr8 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot

### Run3

In [None]:
cd /nfs/scistore18/bartogrp/apal/snap_hap/variants/jobs/stitch-run3_chromSegments

## Define Variables
buffer=100000
K=75
downsampleToCov=20
use_bx_tag=TRUE
ngen=100
niter=40
expRate=0.5
plot=FALSE

In [None]:
sbatch --array=1-72 -J Chr1 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr1 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-78 -J Chr2 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr2 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-66 -J Chr3 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr3 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-55 -J Chr4 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr4 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-72 -J Chr5 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr5 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-56 -J Chr6 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr6 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-56 -J Chr7 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr7 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot
sbatch --array=1-58 -J Chr8 ~/snap_hap/_scripts/sbatch/impute/job-impute_stitch_run3.sbatch.sh Chr8 $buffer $K $downsampleToCov ${use_bx_tag} $ngen $niter $expRate $plot

### Last update

_STITCH_run2_ & _STITCH_run3_ have been run on chromosome segments of 1MB. 