# Importing and Denoising LP3

> downloaded the demultiplexed files from FGCZ 


## 1. File Preprocessing for Import

To import files to QIIME2 with Casava1.8 format we first have to add the Lane number (since it's a full flow cell this is not specified)

In [None]:
%%bash 

## add lane number 
cd /home/lfloerl/cloud/lfloerl/Microterroir/o36452_NextSeq_241102_NS2k-400 

# Loop through all fastq.gz files in the directory
for file in *.fastq.gz; 
do 
  # Extract the parts of the filename
  base_name=$(echo "$file" | sed -E 's/(.*)(_R[12]_001\.fastq\.gz)/\1_L001\2/')
  
  # Rename the file
  mv "$file" "$base_name"
done

## 2. Move the 16S and ITS reads respective folders

FGCZ assigned new IDs, so we have to move the files to import fungal and bacterial data seperately.

In [None]:
%%bash

## split by 16S & ITS reads
mkdir -p ITS
mkdir -p 16S

# Loop through all fastq.gz files in the current directory
for file in *.fastq.gz; do
  # Check if the filename contains 'ITS'
  if [[ "$file" == *ITS* ]]; then
    mv "$file" ITS/
  elif [[ "$file" == *16S* ]]; then
    mv "$file" 16S/
  fi
done

## 3. Import files to QIIME2 

In [None]:
%%bash 

mkdir /home/lfloerl/cloud/lfloerl/Microterroir/artifacts
cd /home/lfloerl/cloud/lfloerl/Microterroir/artifacts 

mkdir -p ITS
mkdir -p 16S

time qiime tools import \
  --type 'SampleData[PairedEndSequencesWithQuality]' \
  --input-path /home/lfloerl/cloud/lfloerl/Microterroir/o36452_NextSeq_241102_NS2k-400/16S \
  --input-format CasavaOneEightSingleLanePerSampleDirFmt \
  --output-path 16S/16S-demux-paired-end.qza

qiime demux summarize --i-data 16S/16S-demux-paired-end.qza --o-visualization 16S/16S-demux-paired-end.qzv

time qiime tools import \
  --type 'SampleData[PairedEndSequencesWithQuality]' \
  --input-path /home/lfloerl/cloud/lfloerl/Microterroir/o36452_NextSeq_241102_NS2k-400/ITS \
  --input-format CasavaOneEightSingleLanePerSampleDirFmt \
  --output-path ITS/ITS-demux-paired-end.qza

qiime demux summarize --i-data ITS/ITS-demux-paired-end.qza --o-visualization ITS/ITS-demux-paired-end.qzv

## 4. Denoise 

### 4.1. Denoise 16S paired end 

In [None]:
%%bash 

cd /home/lfloerl/cloud/lfloerl/Microterroir/artifacts 

mkdir 16S/bac-dada2

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs 16S/16S-demux-paired-end.qza \
    --p-trunc-len-f 190 \
    --p-trunc-len-r 165 \
    --p-n-threads 5 \
    --o-representative-sequences 16S/bac-dada2/dada-rep-seqs.qza \
    --o-table 16S/bac-dada2/dada-table.qza \
    --o-denoising-stats 16S/bac-dada2/dada-stats.qza

### 4.2 Denoise ITS single-end, no cutadapt

In [None]:
%%bash

cd /Users/svens/microterroir/artifacts

time qiime dada denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 180 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-180.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-180.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-180.qza
    
time qiime dada denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 190 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-190.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-190.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-190.qza

time qiime dada denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 200 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-200.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-200.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-200.qza

time qiime dada denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 210 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-210.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-210.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-210.qza

time qiime dada denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 220 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-220.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-220.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-220.qza

### 4.3 ITS Cutadapt without Primer

In [None]:
%%bash

cd /Users/svens/microterroir/artifacts

time qiime cutadapt trim-single \
    --i-demultiplexed-sequences ITS-demux-single-end.qza \
    --o-trimmed-sequences ITS/cutadapt-noprimer/ITS-trimmed-noprimer.qza