In [None]:
# The goal of this script is to copnstruct Metagenome Assemebled Genomes (MAGs)
# We will be building these MAGs from our total metagenome data
    # Specifically, the assemebelies we built from total metagenome reads using MetaSpades
# To construct out MAGs we will....
    # Use MetaBat to bin assembelies into "crude" MAGs
    # We will then use CheckM to check the quality of our MAGs
    # Drep is then used to remove redundant MAGs from our high-quality MAGs

In [None]:
# First step, use MetaBat to bin assembelies
# However, MetaBat requires a .bam file describing read depth and alignment location
# It also wants this .bam file to be sorted
# So, to use MetaBat, we will construct these .bam files by...
    # 1) mapping reads to assembelies with bowtie2
    # 2) converting the output .sam files into .bam
    # 3) sorting the .bam files with samtools
    # 4) run MetaBat!!

# Bowtie step (making .sam file)
# Start by building bowtie2 indexes
# Make empty dir
cd /path/to/MAG_construction/assemblies/scaffolds
for f in *.fa
do
mkdir /path/to/MAG_construction/binning/${f%%.fa*}
done

# Then run make bowtie2 db
conda activate bowtie2
for f in *.fa
do
bowtie2-build \
/path/to/MAG_construction/assemblies/scaffolds/$f \
/path/to/MAG_construction/binning/${f%%.fa*}/${f%%.fa*}_db
done

# Now we will run the actual mapping
cd /path/to/MAG_construction/binning

#!/bin/bash
#SBATCH --job-name=make_SAM
#SBATCH --nodes=1
#SBATCH -t 6:00:00
#SBATCH --ntasks=12
#SBATCH --output=/path/to/MAG_construction/binning/log/%j.out
#SBATCH --partition=med

conda activate bowtie2

bowtie2 -x /path/to/MAG_construction/binning/${1%%_host*}/${1%%_host*}_db \
-1 /path/to/trimmed/and/cleaned/reads/$1 \
-2 /path/to/trimmed/and/cleaned/reads/${1%%R1*}R2.fastq.gz \
-S /path/to/MAG_construction/binning/${1%%_host*}.sam \
--very-sensitive-local

# Run it
conda activate bowtie2
cd /path/to/trimmed/and/cleaned/reads
for f in *R1*.gz
do
sbatch ../scripts/make_SAM.sh $f
done

# Then move the outputs to a new dir
mkdir sam_files
mv *.sam sam_files/
# Next, we have to convert these .sam files into .bam files
# We will do this with samtools
conda activate samtools
for f in *.sam
do
samtools view -S -b $f > ${f%%.sam}.bam
done
# Next, we have to sort
for f in *.bam
do
samtools sort $f -o ${f%%.bam}_sorted.bam
done

# Now we are ready to run MetaBat
    # Note, this was done on an HPC in an srun environment
conda activate metabat
for f in *.fa
do
runMetaBat.sh \
/path/to/MAG_construction/assemblies/scaffolds/$f \
/path/to/MAG_construction/binning/sam_files/sorted_bam/${f%%.fa}_sorted.bam
done

In [None]:
# Now we want to check the quality of our bins/MAGs
# We will do this through checkM

# Copy and rename bins to new dir named bin_seqs
cd /path/to/MAG_construction/binning/bins
for f in */*
do
sam=${f%%.fa*}
bin=${f##*/}
cp $f ./bin_seqs/${sam}_${bin}
done

# Load checkm
    # Note, this was done on an HPC in an srun env with a lot of ram
checkm lineage_wf -t 8 -x fa \
/path/to/MAG_construction/binning/bins/bin_seqs \
./checkm_out

# After some processing in R
# (selecting for Bins with <= 2% contam and >= 90% completion)
# copy high quality bins using list
cd /path/to/MAG_construction/binning/bins/bin_seqs/checkm_out

while read f; do
cp ../${f}.fa ~/bee_phage/dls/total_metagenomes/checkM
done < high_quality_bins.csv

# Move to that dir
cd ~/bee_phage/dls/total_metagenomes/checkM

In [None]:
# Finally, Dereplicate with dRep
cd /path/to/MAG_construction
mkdir drep
# Uses checkM, so will have to be done with a lot of RAM too
dRep dereplicate ./ -g \
/path/to/MAG_construction/binning/bins/bin_seqs/checkM/*.fa
# After dereplicating we are left with 23 MAGS
# 12 from honey bees
# 11 from bumble bees