In [None]:
# visualize initial read quality
# We will do this with fastqc

##### Virome Samples #####
# move to new dir
cd path/to/raw/virome/read/files
# activate conda
conda activate fastqc
# run fastqc
for f in *.gz
do
fastqc $f
done
# move files to new dir
mkdir fastqc
mv *.html fastqc
mv *.zip fastqc

##### Negative Samples #####
# Lets also look at our negative samples
cd path/to/raw/negative/read/files
conda activate fastqc
# run fastqc
for f in *.gz
do
fastqc $f
done
# move files to new dir
mkdir fastqc
mv *.html fastqc
mv *.zip fastqc

##### Total metagenomes #####
cd path/to/raw/total_metagenome/read/files
conda activate fastqc
# Run it
for f in *.gz 
do
fastqc $f
done
# move files to new dir
mkdir fastqc
mv *.html fastqc
mv *.zip fastqc

In [None]:
# Now lets clean up our seqs
# We will do this with...
# Trim_galore to remove polyG reads (byproduct of NovaSeq)
# And trimmomatic to remove adapters and improve read quality

##### NEG #####
cd path/to/raw/virome/read/files
# Remove Poly-G seqs from read libraries
# Make new dir for data
mkdir no_Gs
# Activate conda env
conda activate trim_galore
# Run loop
for f in *R1*
do
trim_galore --paired \
$f ${f%%R1*}R2_001.fastq.gz \
--phred33 \
-q 0 \
-a G{100} \
-A G{100} \
-o ./no_Gs
done
# Remove adapters and quality trim
cd no_Gs
# Acivate trimmomatic
conda activate trimmo
# Run loop
for f in *R1*.gz
do
trimmomatic \
PE -threads 4 -phred33 \
$f ${f%%R1*}R2_001_val_2.fq.gz \
../trimmed/${f%%001*}_trimmed.fq.gz \
../trimmed/${f%%001*}_unpaired.fq.gz \
../trimmed/${f%%R1*}R2_trimmed.fq.gz \
../trimmed/${f%%R1*}}R2_unpaired.fq.gz \
ILLUMINACLIP:../adapters/TruSeq3-PE-2.fa:2:30:10:2 \
AVGQUAL:25 MINLEN:40
done
# move unpaired seqs to their own dir
mkdir unpaired
mv *unpaired.gz unpaired 
mkdir paired
mv *paired.gz
# Run fastqc on trimmed seqs
conda activate fastqc
for f in *.gz
do
fastqc $f
done
# Stash fastqc results in new dir
mkdir fastqc
mv *.html ./fastqc
mv *.zip ./fastqc

##### Virome #####
cd path/to/raw/virome/read/files
# Remove Poly-G seqs from read libraries
# Make new dir for data
mkdir no_Gs
# Activate conda env
conda activate trim_galore
# Run loop
for f in *R1*
do
trim_galore --paired \
$f ${f%%R1*}R2_001.fastq.gz \
--phred33 \
-q 0 \
-a G{100} \
-A G{100} \
-o ./no_Gs
done
# Trimmomatic
# Remove adapters and quality trim
cd no_Gs
# Acivate trimmomatic
conda activate trimmo
# Run loop
for f in *R1*.gz
do
trimmomatic \
PE -threads 4 -phred33 \
$f ${f%%R1*}R2_001_val_2.fq.gz \
../trimmed/${f%%001*}_trimmed.fq.gz \
../trimmed/${f%%001*}_unpaired.fq.gz \
../trimmed/${f%%R1*}R2_trimmed.fq.gz \
../trimmed/${f%%R1*}}R2_unpaired.fq.gz \
ILLUMINACLIP:../adapters/TruSeq3-PE-2.fa:2:30:10:2 \
AVGQUAL:25 MINLEN:40
done
# move unpaired seqs to their own dir
mkdir unpaired
mv *unpaired.fq.gz unpaired 
# Run fastqc on trimmed seqs
conda activate fastqc
for f in *.gz
do
fastqc $f
done
# Stash fastqc results in new dir
mkdir fastqc
mv *.html ./fastqc
mv *.zip ./fastqc

##### Total metagenomes #####
cd path/to/raw/total_metagenome/read/files
# Remove Poly-G seqs from read libraries
# Make new dir for data
mkdir no_Gs
# Activate conda env
conda activate trim_galore
# Run loop
for f in *R1*
do
trim_galore --paired \
$f ${f%%R1*}R2_001.fastq.gz \
--phred33 \
-q 0 \
-a G{100} \
-A G{100} \
-o ./no_Gs
done
# Remove adapters and quality trim
cd no_Gs
# Move .txt files to new dir
mkdir reports 
mv *.txt reports
# Acivate trimmomatic
conda activate trimmo
# Run loop
for f in *R1*.gz
do
trimmomatic \
PE -threads 4 -phred33 \
$f ${f%%R1*}R2_001_val_2.fq.gz \
../trimmed/${f%%001*}_trimmed.fq.gz \
../trimmed/${f%%001*}_unpaired.fq.gz \
../trimmed/${f%%R1*}R2_trimmed.fq.gz \
../trimmed/${f%%R1*}}R2_unpaired.fq.gz \
ILLUMINACLIP:../adapters/TruSeq3-PE-2.fa:2:30:10:2 \
AVGQUAL:25 MINLEN:40
done
# move unpaired seqs to their own dir
cd trimmed
mkdir unpaired
mv *unpaired.fq.gz unpaired 
# Run fastqc on trimmed seqs
conda activate fastqc
for f in *.gz
do
fastqc $f
done
# Stash fastqc results in new dir
mkdir fastqc
mv *.html ./fastqc
mv *.zip ./fastqc

In [None]:
# We now transfer these semi cleaned reads to our HPC for the next steps

##### NEG #####
rsync -v path/to/neg/trimmed/*.gz \
dsbard@farm.cse.ucdavis.edu:/home/dsbard/bee_phage/dls/neg/trimmed_reads

##### virome #####
rsync -v path/to/virome/trimmed/*.gz \
dsbard@farm.cse.ucdavis.edu:/home/dsbard/bee_phage/dls/virome/trimmed_reads
    
##### total metagenomes #####
rsync -v path/to/total_metagenomes/trimmed/*.gz \
dsbard@farm.cse.ucdavis.edu:/home/dsbard/bee_phage/dls/total_metagenomes/trimmed_reads