In [None]:
# So we have cleaned our reads fairly well up until this point
    # Removed polyG seqs
    # Removed adapter seqs
    # Quality filtered
# But now we want to remove any potential contamination
# This includes...
    # Host bee seqs
    # Potential extraction contamination
# To do this, we will 
    # 1) Construct scaffolds from our negative reads
        # These is our potential extraction contamination stuff
    # 2) Next, we will add host bee seqs 
        # B. imp and A. mel genomes
    # 3) We will then align all of our reads against this database of potential contamination 
    # 4) Any reads which map to those negative contams will be removed

In [None]:
# Build scaffolds from negative reads

#!/bin/bash
#SBATCH --job-name=Assemblies
#SBATCH --nodes=1
#SBATCH -t 6:00:00 
#SBATCH --ntasks=8
#SBATCH --output=/path/to/neg/assemebly/log_files/%j.out
#SBATCH --partition=med

module load spades

spades.py \
-o /path/to/neg/assemebly/${1%%_S*} \
-1 $1 \
-2 ${1%%R1*}R2_trimmed.fq.gz \
-k 21,33,55,77,99,127 \
--meta \
-m 120 \
-t 8

# Run this shell script as a loop
cd path/to/neg/trimmed_reads
for f in *R1*.gz
do
sbatch ../scripts/assemble.sh $f
done

# Collect output in new file 
cd path/to/neg/assemblies
mkdir scaffolds
cat *NEG/scaffolds.fasta >> scaffolds/negative_seqs.fasta

# Lets move this negative_seqs file to a new dir
cd path/to/neg
mkdir negative_host_contam
cp path/to/neg/assemblies/scaffolds/negative_seqs.fasta ./negative_host_contam

# Now we throw in the Bombus impatiens and Apis mellifera genomes
scp path/to/downloaded/host/bee/genomes/*.fna \
dsbard@farm.cse.ucdavis.edu:/path/to/neg/negative_host_contam
# Combine bee genomes and negative contam into one big file
cd path/to/neg/negative_host_contam
cat negative_host_contam \
GCA_000002195.1_Amel_4.5_genomic.fna \
GCF_000188095.3_BIMP_2.2_genomic.fna \
>> contaminating_seqs.fasta

# Make a local database from this contam file using bowtie
srun -p bigmemm --time=1:00:00 --nodes=1 \
--cpus-per-task 1 --mem 5 --pty /bin/bash
conda activate bowtie2
cd path/to/neg/negative_host_contam
bowtie2-build contaminating_seqs.fasta negative_db
mkdir db
mv *.bt2 db

In [None]:
# Now we actually use Bowtie2 to remove contamination from our read libraries

##### Virome #####

#!/bin/bash
#SBATCH --job-name=decontam
#SBATCH --nodes=1
#SBATCH -t 6:00:00
#SBATCH --ntasks=12
#SBATCH --output=/home/dsbard/bee_phage/dls/virome/decontam/log/decontam%j.out
#SBATCH --partition=med

conda activate bowtie2

bowtie2 -p 1 -x path/to/neg/negative_host_contam/db/negative_db \
  -1 path/to/virome/trimmed_reads/$1 \
  -2 path/to/virome/trimmed_reads/${1%%R1*}R2_trimmed.fq.gz \
  --un-conc-gz \
  path/to/virome/decontam/${1%%_S*}_host_removed \
  > path/to/virome/decontam/${1%%_S*}_host_removed_mapped_and_unmapped.sam

# run shell script from dir with trimmer reads
for f in *R1*
do
sbatch ../scripts/decontam.sh $f
done

# Then rename outputs
for f in *removed.1
do
mv $f ${f%%.1}_R1.fastq.gz
done
for f in *removed.2
do
mv $f ${f%%.2}_R2.fastq.gz
done

##### Total metagenome #####

#!/bin/bash
#SBATCH --job-name=decontam
#SBATCH --nodes=1
#SBATCH -t 6:00:00
#SBATCH --ntasks=12
#SBATCH --output=/home/dsbard/bee_phage/dls/total_metagenomes/decontam/log/decontam%j.out
#SBATCH --partition=bigmemm

conda activate bowtie2

bowtie2 -p 1 -x path/to/neg/negative_host_contam/db/negative_db \
  -1 path/to/total_metagenomes/trimmed_reads/$1 \
  -2 path/to/total_metagenomes/trimmed_reads/${1%%R1*}R2_trimmed.fq.gz \
  --un-conc-gz \
  path/to/total_metagenomes/decontam/${1%%_S*}_host_removed \
  > path/to/total_metagenomes/decontam/${1%%_S*}_host_removed_mapped_and_unmapped.sam

# run shell script from dir with trimmer reads
for f in *R1*
do
sbatch ../scripts/decontam.sh $f
done

# Then rename outputs to 
for f in *removed.1
do
mv $f ${f%%.1}_R1.fastq.gz
done
for f in *removed.2
do
mv $f ${f%%.2}_R2.fastq.gz
done