-
Notifications
You must be signed in to change notification settings - Fork 0
09_GENOME_DEREPLICATION
eolesin edited this page Jun 8, 2021
·
21 revisions
Do this to get the genomes out of anvio and into FASTA format.
# Rename bins that pass community MAG standards as MAGs
for i in `cat AMOR_2020_Good`; do inew=${i//-/_}; # Anvi'o doesn't like dashes so we remove from names
anvi-rename-bins -p ${MERGED_PATH}${i}-MERGED-PROFILE/PROFILE.db \
-c ${CONTIG_PATH}${i}/${i}.prefixed.contigs.db --prefix s_${inew} \
--collection-to-read dastool --collection-to-write MAGS_${inew} \
--report-file ${i}_report.txt --min-completion-for-MAG 80 \
--max-redundancy-for-MAG 10 --call-MAGs;
done
MERGED_PATH="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/07_MERGED_PROFILES/"
CONTIG_PATH="/export/dahlefs/work/Metagenomes_chimneys_2020_workfolder/03_INDIV_ASSEMBLY/"
for i in `cat AMOR_2020_Good`; do \
inew=${i//-/_}; # Anvi'o doesn't like dashes so we remove them from sample names
anvi-summarize -p ${MERGED_PATH}${i}-MERGED-PROFILE/PROFILE.db \
-c ${CONTIG_PATH}${i}/${i}.prefixed.contigs.db --reformat-contig-names \
-C MAGS_${inew} -o $i;
done
# Within the MAGS output folder:
for i in pwd; do find ~+ -type f -name "*-contigs.fa"; done >> genome_paths
grep "MAG_" genome_paths >> MAGS_for_derep
dRep dereplicate -p 60 -g $MAG_path/genome_paths -l 10000 \
-comp 80 -con 10 -sa 0.98 --multiround_primary_clustering --run-tertiary_clustering .