# NgsRelate

In [None]:
#!/bin/bash -e
#SBATCH --account=$user
#SBATCH --job-name=ngsrelate
#SBATCH --time=1-00:00:00
#SBATCH --mem=150GB
#SBATCH --cpus-per-task=8
#SBATCH --output=%x_%A.out
#SBATCH --error=%x_%A.err

module load Miniconda3/23.10.0-1
module load angsd/0.935-GCC-9.2.0
module load bzip2/1.0.8-GCCcore-12.3.0

export PATH=$PATH:/$workdir/ngsRelate
# may also need to update PATH for the newest GLIBXX package

workdir="/$workdir/results/06_grouped"
outdir="/$workdir/results/06_angsd/ngsrelate"
OUTPUT="${outdir}/angsd_GL_for_ngsrelate"
BAM_LIST="${workdir}/med_cov_rmrelated.bam.filelist"
REF="/$workdir/reference_sequences/GCF_011064425.1_Rrattus_CSIRO_v1/GCF_011064425.1_Rrattus_CSIRO_v1_genomic_autosomesonly.fna.gz"
FREQ="${OUTPUT}.freq"
NGSRELATE_OUTPUT="${outdir}/ngsrelate_output"
GL="${outdir}/angsd_GL_for_ngsrelate.glf.gz"

mkdir -p $outdir

# generate GLs
angsd -b $BAM_LIST -gl 2 -domajorminor 1 -snp_pval 1e-6 -domaf 1 -minmaf 0.05 -doGlf 3 -nThreads 8 -out $OUTPUT

# extract the frequency column from the allele frequency file and remove the header (required by NgsRelate)
zcat "${OUTPUT}.mafs.gz" | cut -f5 |sed 1d > $FREQ

# run ngsrelate
ngsRelate -g $GL -n 41 -f $FREQ -O $NGSRELATE_OUTPUT


# PCAngsd
Also run with 1% and 5% MAF filters.

In [None]:
#!/bin/bash -l
#SBATCH --job-name=geno_angsd
#SBATCH --output=%x_%j.out
#SBATCH --error=%x_%j.err
#SBATCH --time=1-00:00:00
#SBATCH --mem=60G
#SBATCH --cpus-per-task=12

module load angsd/0.935-GCC-9.2.0

workdir="/$workdir/results/06_grouped"
outdir="/$workdir/results/06_angsd/pcangsd"
BAM_LIST="${workdir}/med_cov_rmrelated.bam.filelist"
REF="/$workdir/reference_sequences/GCF_011064425.1_Rrattus_CSIRO_v1/GCF_011064425.1_Rrattus_CSIRO_v1_genomic_autosomesonly.fna.gz"

mkdir -p $outdir

angsd \
    -bam ${BAM_LIST} \
    -ref ${REF} \
    -out "${outdir}/nomaf" \
    -nThreads 12 \
    -GL 1 \
    -doGlf 2 \
    -doMaf 2 -doMajorMinor 1 \
    -minMapQ 30 \
    -minQ 20 \
    -SNP_pval 1e-6 \
    -doGeno 2 \
    -doPost 1 -postCutoff 0.95 \
    -minInd 24 \
    -doCounts 1 \
    -setMinDepthInd 4 \
    -setMaxDepthInd 100

In [None]:
#!/bin/bash -e
#SBATCH --account=$user
#SBATCH --job-name=pcangsd_nomaf
#SBATCH --time=03:00:00
#SBATCH --mem=72GB
#SBATCH --cpus-per-task=10
#SBATCH --output=%x_%A.out
#SBATCH --error=%x_%A.err

export PATH=$PATH:/$workdir/Software/pcangsd/

INPUT="/$workdir/results/06_angsd/pcangsd/nomaf.beagle.gz"
OUTPUT="/$workdir/results/06_angsd/pcangsd/nomaf_pca"

pcangsd --beagle $INPUT --eig 2 --threads 20 --out $OUTPUT --selection --admix


# NGSadmix
Also run with 1% and 5% MAF filters.

In [None]:
#!/bin/bash -e

#SBATCH --cpus-per-task=6
#SBATCH --job-name=ngsadmix_nomaf
#SBATCH --mem=70G
#SBATCH --time=72:00:00
#SBATCH --account=$user
#SBATCH --output=%x_%j.out
#SBATCH --error=%x_%j.err
#SBATCH --hint=nomultithread

module purge
module load angsd/0.935-GCC-9.2.0

# Define variables
BEAGLE_FILE="/$workdir/results/06_angsd/pcangsd/nomaf.beagle.gz"
OUT_PREFIX="/$workdir/results/06_angsd/pcangsd/ngsadmix_results/nomaf"
THREADS=${SLURM_CPUS_PER_TASK}
MIN_MAF=0.00                 # enforced here to override default 5%
MIS_TOL=0.8                  # tolerance for high-quality genotypes
SEEDS=(14041 82680 21349)
MAX_ITER=2000
NGSadmix="/$workdir/Software/NGSadmix"

for K in {1..6}; do
    echo "Running ngsAdmix for K=${K}"

    # Loop through seeds
    for SEED in "${SEEDS[@]}"; do
        echo "Using seed=${SEED}"

        # Run ngsAdmix
        $NGSadmix -likes $BEAGLE_FILE \
                 -K $K \
                 -outfiles "${OUT_PREFIX}_K${K}_seed${SEED}" \
                 -seed $SEED \
                 -minMaf $MIN_MAF \
                 -misTol $MIS_TOL \
                 -P $THREADS \
                 -maxiter $MAX_ITER
    done
done