Skip to content

11_MAG_MAPPING

eolesin edited this page Aug 16, 2021 · 17 revisions

Mapping MAGS back to the read data Pulling most commands from the Anvio docs for the TARA oceans dataset analysis: https://merenlab.org/data/tara-oceans-mags/

  1. Import to anvio
# WITHIN THE 11_MAP_MAGS folder on kjempefuru
# concatenate all the contigs from all the "winning" MAGS from dRep:
cat dereplicated_genomes/*fa > NON-REDUNDANT-MAGS.fa

# Runar did not have good deflines tisk tisk, which Anvio complains about. Argh!
# replacing dash, and parentheses with underscore.
sed -i 's/-/_/g; s/(/_/g; s/)/_/g' NON_REDUNDANT_MAGS.fa 


# Then we import the file to anvio to make it a contig.db
anvi-gen-contigs-database -f NON-REDUNDANT-MAGS.fa -o NON-REDUNDANT-MAGs-CONTIGS.db

Achim did some magic to make the contigs.db file

* First need to remove all of the renamed bins that are not in the dereplicated set:
diff -q dereplicated_genomes/ Runar_renamed_bins/|grep Only|awk -F": " '{print $2}'>removed_bins_Runar.tx

 

xargs rm < ../removed_bins_Runar.txt

 

    achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98 $ cp Runar_renamed_bins/*
 Runar_renamed_bins_AM/

 

    achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98/Runar_renamed_bins_AM $ xargs rm < ../removed_bins_Runar.txt

 

    achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98 $ cp dereplicated_genomes/s_* dereplicated_genomes_AM/
    
    achimm@kjempefuru /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98 $ cp Runar_renamed_bins_AM/* dereplicated_genomes_AM/

 

* concatenate:
    /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/08_DEREP_GENOMES/ALL_withRunar_drep_COMP70_ANI98/dereplicated_genomes_AM $ cat *fa > ../NON-REDUNDANT-MAGS.fa
* Make contigs.db from the fasta 
    conda activate anvio-dev
    anvi-gen-contigs-database -f NON-REDUNDANT-MAGS.fa -o NON-REDUNDANT-MAGs-CONTIGS.db -T 30
  1. Map reads back
PATH_2019="/export/dahlefs/work/Shotgun/Metagenomes_chimneys_2019/01_QC/"
PATH_2020="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/02_HUMAN_Decontam/"
PATH_Runar="/export/dahlefs/work/Runar_AMOR_metagenomes/"

for i in `cat AMOR_only`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam; 
done

for i in `cat Iron_mats_Good`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam; 
done

for i in `cat AMOR_2019`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2019}${i}-QUALITY_PASSED_R1.fastq -2 ${PATH_2019}${i}-QUALITY_PASSED_R2.fastq \
--no-unal -S Bowtie/${i}-in-NRMAGS.sam; 
done


for i in `cat sample_list_noNP`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_Runar}${i}_S1_L001_R1_001.fastq -2 ${PATH_Runar}${i}_S1_L001_R2_001.fastq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam; done
  1. Covert .sam to .bam. Index with samtools and remove temp files.

  2. Emily and Achim put MAG names in Runar's bin FASTA contig headers. On SAGA.

#!/usr/bin/bash
# every job must be accounted for
#SBATCH --account=nn9836k

#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=5G
# every job requires some specification of the memory (RAM) it needs
# every job requires a runtime limit
#SBATCH --time=48:00:00

module load anvio/6.1-intel-2019b-Python-3.7.4

for sample in *.fa;      
do      pref=$(basename "$sample" .fa); inew=${pref//./_}; inew2=${inew//-/_};  
anvi-script-reformat-fasta ${sample}  --simplify-names --prefix s_${inew2} -o Renamed_MAGs_Runar/${sample};
done                                                                   
  1. Concatenated fasta of renamed winning dereplicated genomes imported into Anvio and created contig.db file of them

  2. Rerun the mapping using the new contig.db

# in /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/11_MAP_MAGS/ALL_withRunar_drep_COMP70_ANI98

screen 

PATH_2019="/export/dahlefs/work/Shotgun/Metagenomes_chimneys_2019/01_QC/"
PATH_2020="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/02_HUMAN_Decontam/"
PATH_Runar="/export/dahlefs/work/Runar_AMOR_metagenomes/"

conda activate bowtie2 

bowtie2-build --threads 40 NON-REDUNDANT-MAGS.fa NON-REDUNDANT-MAGS

#This time I added a log file output for each sample so we can actually see how much of 
# each sample maps to the MAGs without crawling through BAMs or something.
for i in `cat AMOR_only`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log; 
done

for i in `cat Iron_mats_Good`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log; 
done

for i in `cat AMOR_2019`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2019}${i}-QUALITY_PASSED_R1.fastq -2 ${PATH_2019}${i}-QUALITY_PASSED_R2.fastq \
--no-unal -S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log; 
done


for i in `cat sample_list_noNP`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_Runar}${i}_S1_L001_R1_001.fastq -2 ${PATH_Runar}${i}_S1_L001_R2_001.fastq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log; done

Convert sam to bam and do sorting and indexing

for i in `cat all_samples_all_projects`
do samtools view -F 4 -bS Bowtie/${i}-in-NRMAGS.sam > ${i}-in-NRMAGS-RAW.bam;
samtools sort Bowtie/${i}-in-NRMAGS-RAW.bam -o ${i}-in-NRMAGS.bam
samtools index Bowtie/${i}-in-NRMAGS.bam;
done


Perform the profiling in Anvio. Merge the profiles

for i in `cat all_samples_all_projects`; do anvi-profile -c NON-REDUNDANT-MAGS.db \
-i Bowtie/${i}-in-NRMAGS.bam --skip-SNV-profiling --num-threads 20 -o ${i}-in-NRMAGS; 
done  

# merge resulting profiles into a single anvi'o merged profile
anvi-merge *-in-NRMAGS/PROFILE.db \
           -c NON-REDUNDANT-MAGS.db \
           -o NON-REDUNDANT-MAGS-MERGED

Clone this wiki locally