-
Notifications
You must be signed in to change notification settings - Fork 0
11_MAG_MAPPING
eolesin edited this page Aug 13, 2021
·
17 revisions
Mapping MAGS back to the read data Pulling most commands from the Anvio docs for the TARA oceans dataset analysis: https://merenlab.org/data/tara-oceans-mags/
- Import to anvio
# WITHIN THE 11_MAP_MAGS folder on kjempefuru
# concatenate all the contigs from all the "winning" MAGS from dRep:
cat dereplicated_genomes/*fa > NON-REDUNDANT-MAGS.fa
# Runar did not have good deflines tisk tisk, which Anvio complains about. Argh!
# replacing dash, and parentheses with underscore.
sed -i 's/-/_/g; s/(/_/g; s/)/_/g' NON_REDUNDANT_MAGS.fa
# Then we import the file to anvio to make it a contig.db
anvi-gen-contigs-database -f NON-REDUNDANT-MAGS.fa -o NON-REDUNDANT-MAGs-CONTIGS.db
- Map reads back
PATH_2019="/export/dahlefs/work/Shotgun/Metagenomes_chimneys_2019/01_QC/"
PATH_2020="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/02_HUMAN_Decontam/"
PATH_Runar="/export/dahlefs/work/Runar_AMOR_metagenomes/"
for i in `cat AMOR_only`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam;
done
for i in `cat Iron_mats_Good`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam;
done
for i in `cat AMOR_2019`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_2019}${i}-QUALITY_PASSED_R1.fastq -2 ${PATH_2019}${i}-QUALITY_PASSED_R2.fastq \
--no-unal -S Bowtie/${i}-in-NRMAGS.sam;
done
for i in `cat sample_list_noNP`; do bowtie2 --threads 20 -x NON_REDUNDANT_MAGS \
-1 ${PATH_Runar}${i}_S1_L001_R1_001.fastq -2 ${PATH_Runar}${i}_S1_L001_R2_001.fastq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam; done
-
Covert .sam to .bam. Index with samtools and remove temp files.
-
Emily and Achim put MAG names in Runar's bin FASTA contig headers. On SAGA.
#!/usr/bin/bash
# every job must be accounted for
#SBATCH --account=nn9836k
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=5G
# every job requires some specification of the memory (RAM) it needs
# every job requires a runtime limit
#SBATCH --time=48:00:00
module load anvio/6.1-intel-2019b-Python-3.7.4
for sample in *.fa;
do pref=$(basename "$sample" .fa); inew=${pref//./_}; inew2=${inew//-/_};
anvi-script-reformat-fasta ${sample} --simplify-names --prefix s_${inew2} -o Renamed_MAGs_Runar/${sample};
done
-
Concatenated fasta of renamed winning dereplicated genomes imported into Anvio and created contig.db file of them
-
Rerun the mapping using the new contig.db
# in /export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/11_MAP_MAGS/ALL_withRunar_drep_COMP70_ANI98
screen
PATH_2019="/export/dahlefs/work/Shotgun/Metagenomes_chimneys_2019/01_QC/"
PATH_2020="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/02_HUMAN_Decontam/"
PATH_Runar="/export/dahlefs/work/Runar_AMOR_metagenomes/"
conda activate bowtie2
bowtie2-build --threads 40 NON-REDUNDANT-MAGS.fa NON-REDUNDANT-MAGS
#This time I added a log file output for each sample so we can actually see how much of
# each sample maps to the MAGs without crawling through BAMs or something.
for i in `cat AMOR_only`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log;
done
for i in `cat Iron_mats_Good`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2020}${i}-cleanR1.fq -2 ${PATH_2020}${i}-cleanR2.fq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log;
done
for i in `cat AMOR_2019`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_2019}${i}-QUALITY_PASSED_R1.fastq -2 ${PATH_2019}${i}-QUALITY_PASSED_R2.fastq \
--no-unal -S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log;
done
for i in `cat sample_list_noNP`; do (bowtie2 --threads 40 -x NON-REDUNDANT-MAGS \
-1 ${PATH_Runar}${i}_S1_L001_R1_001.fastq -2 ${PATH_Runar}${i}_S1_L001_R2_001.fastq --no-unal \
-S Bowtie/${i}-in-NRMAGS.sam) 2>${i}.log; done
Convert sam to bam and do sorting and indexing
for i in `cat all_samples_all_projects`
do samtools view -F 4 -bS Bowtie/${i}-in-NRMAGS.sam > ${i}-in-NRMAGS-RAW.bam;
samtools sort Bowtie/${i}-in-NRMAGS-RAW.bam -o ${i}-in-NRMAGS.bam
samtools index Bowtie/${i}-in-NRMAGS.bam;
done