Skip to content

04_MAPPING

eolesin edited this page May 19, 2021 · 7 revisions

Step 0. Prep the data for mapping to be followed by binning

Yet another loop. Round and round we go... Within the 03_ASSEMBLIES directory:

# Reformat names

for SET in `cat set.txt`
do
    anvi-script-reformat-fasta $SET/$SET-contigs.fa -l 2500 --simplify-names -o $SET.fa
    anvi-gen-contigs-database -f 04_CONTIGS/$SET.fa -o 04_CONTIGS/$SET-CONTIGS.db
done

# HMM profiling
for SET in `cat set.txt`; do anvi-run-hmms --num-threads 20 -c 04_CONTIGS/$SET-CONTIGS.db ; done

# Run against NCBI COG database
for file in ./*.db; do anvi-run-ncbi-cogs --num-threads 16 -c $file ; done

Step 1. Index

# Build bowtie2 DB for each co-assembly
for SET in `cat set.txt`; do bowtie2-build 03_CONTIGS/$SET.fa 04_MAPPING/$SET --num-threads 20; done

Step 2. Actually do the mapping

while read line;      
    do         
    SET=$(echo $line | cut -d" " -f1);          
    samples=$(echo $line | cut -d" " -f2);          
    delimiter=",";          
    declare -a Smparray=($(echo $samples | tr "$delimiter" " "));         
    for samp in "${Smparray[@]}"; 
        do      
            bowtie2 --threads 40 \
            -x 05_MAPPING/$SET \
            -1 02_HUMAN_Decontam/$samp-cleanR1.fq \
            -2 02_HUMAN_Decontam/$samp-cleanR2.fq \
            --no-unal \
            -S 05_MAPPING/$samp.sam;     
            samtools view -F 4 -bS 05_MAPPING/$samp.sam > 05_MAPPING/$samp-RAW.bam;     
            samtools sort 05_MAPPING/$samp-RAW.bam -o 05_MAPPING/$samp.bam;     
            samtools index 05_MAPPING/$samp.bam;     
            rm 05_MAPPING/$samp.sam 05_MAPPING/$samp-RAW.bam;      
        done;     
    done < samples_in_sets.txt

Clone this wiki locally