# Importing and Denoising LP3

> downloaded the demultiplexed files from FGCZ 


## 1. File Preprocessing for Import

To import files to QIIME2 with Casava1.8 format we first have to add the Lane number (since it's a full flow cell this is not specified)

In [None]:
%%bash 

## add lane number 
cd /home/lfloerl/cloud/lfloerl/Microterroir/o36452_NextSeq_241102_NS2k-400 

# Loop through all fastq.gz files in the directory
for file in *.fastq.gz; 
do 
  # Extract the parts of the filename
  base_name=$(echo "$file" | sed -E 's/(.*)(_R[12]_001\.fastq\.gz)/\1_L001\2/')
  
  # Rename the file
  mv "$file" "$base_name"
done

## 2. Move the 16S and ITS reads respective folders

FGCZ assigned new IDs, so we have to move the files to import fungal and bacterial data seperately.

In [None]:
%%bash

## split by 16S & ITS reads
mkdir -p ITS
mkdir -p 16S

# Loop through all fastq.gz files in the current directory
for file in *.fastq.gz; do
  # Check if the filename contains 'ITS'
  if [[ "$file" == *ITS* ]]; then
    mv "$file" ITS/
  elif [[ "$file" == *16S* ]]; then
    mv "$file" 16S/
  fi
done

## 3. Import files to QIIME2 

In [None]:
%%bash 

mkdir /home/lfloerl/cloud/lfloerl/Microterroir/artifacts
cd /home/lfloerl/cloud/lfloerl/Microterroir/artifacts 

mkdir -p ITS
mkdir -p 16S

time qiime tools import \
  --type 'SampleData[PairedEndSequencesWithQuality]' \
  --input-path /home/lfloerl/cloud/lfloerl/Microterroir/o36452_NextSeq_241102_NS2k-400/16S \
  --input-format CasavaOneEightSingleLanePerSampleDirFmt \
  --output-path 16S/16S-demux-paired-end.qza

qiime demux summarize --i-data 16S/16S-demux-paired-end.qza --o-visualization 16S/16S-demux-paired-end.qzv

time qiime tools import \
  --type 'SampleData[PairedEndSequencesWithQuality]' \
  --input-path /home/lfloerl/cloud/lfloerl/Microterroir/o36452_NextSeq_241102_NS2k-400/ITS \
  --input-format CasavaOneEightSingleLanePerSampleDirFmt \
  --output-path ITS/ITS-demux-paired-end.qza

qiime demux summarize --i-data ITS/ITS-demux-paired-end.qza --o-visualization ITS/ITS-demux-paired-end.qzv

## 4. Denoise 

### 4.1. Denoise 16S paired end 

In [None]:
%%bash 

cd /home/lfloerl/cloud/lfloerl/Microterroir/artifacts 

mkdir 16S/bac-dada2

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs 16S/16S-demux-paired-end.qza \
    --p-trunc-len-f 190 \
    --p-trunc-len-r 165 \
    --p-n-threads 5 \
    --o-representative-sequences 16S/bac-dada2/dada-rep-seqs.qza \
    --o-table 16S/bac-dada2/dada-table.qza \
    --o-denoising-stats 16S/bac-dada2/dada-stats.qza

In [3]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs 16S/16S-demux-paired-end.qza \
    --p-trunc-len-f 220 \
    --p-trunc-len-r 190 \
    --o-representative-sequences 16S/denoised/dada-220_190-rep-seqs.qza \
    --o-table 16S/denoised/dada-220_190-table.qza \
    --o-denoising-stats 16S/denoised/dada-220_190-stats.qza \
    --verbose

time qiime feature-table tabulate-seqs \
    --i-data 16S/denoised/dada-220_190-rep-seqs.qza \
    --o-visualization 16S/denoised/dada-220_190-rep-seqs.qzv

time qiime feature-table summarize \
    --i-table 16S/denoised/dada-220_190-table.qza \
    --o-visualization 16S/denoised/dada-220_190-table.qzv

time qiime metadata tabulate \
    --m-input-file 16S/denoised/dada-220_190-stats.qza \
    --o-visualization 16S/denoised/dada-220_190-stats.qzv

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada.R --input_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/forward --input_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/reverse --output_path /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/output.tsv.biom --output_track /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/track.tsv --filtered_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_f --filtered_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_r --truncation_length 220 --truncation_length_reverse 190 --trim_left 0 --trim_left_reverse 0 --max_expected_errors 2.0 --max_expected_errors_reverse 2.0 --truncation_quality_score 2 --min_overlap 12 --p

Lade nötiges Paket: Rcpp


DADA2: 1.30.0 / Rcpp: 1.0.13.1 / RcppParallel: 5.1.9 


The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_f/364527_045-LP3-16S-0045_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_r/364527_045-LP3-16S-0045_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_f/364527_162-LP3-16S-0162_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_r/364527_162-LP3-16S-0162_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_f/364527_165-LP3-16S-0165_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_r/364527_165-LP3-16S-0165_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpofxg5r3v/filt_f/364527_258-LP3-16S-0258_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl4

2) Filtering ...........................................x....................................................................................................................x..x............................................................................................x..............................................................................................................................................................x...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................


real	287m2.052s
user	285m17.362s
sys	1m29.416s


Saved Visualization to: 16S/denoised/dada-220_190-rep-seqs.qzv



real	0m16.006s
user	0m9.335s
sys	0m1.110s


Saved Visualization to: 16S/denoised/dada-220_190-table.qzv



real	0m12.773s
user	0m6.386s
sys	0m1.167s


Saved Visualization to: 16S/denoised/dada-220_190-stats.qzv



real	0m10.883s
user	0m4.744s
sys	0m0.973s


In [4]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs 16S/16S-demux-paired-end.qza \
    --p-trunc-len-f 210 \
    --p-trunc-len-r 180 \
    --o-representative-sequences 16S/denoised/dada-210_180-rep-seqs.qza \
    --o-table 16S/denoised/dada-210_180-table.qza \
    --o-denoising-stats 16S/denoised/dada-210_180-stats.qza \
    --verbose

time qiime feature-table tabulate-seqs \
    --i-data 16S/denoised/dada-210_180-rep-seqs.qza \
    --o-visualization 16S/denoised/dada-210_180-rep-seqs.qzv

time qiime feature-table summarize \
    --i-table 16S/denoised/dada-210_180-table.qza \
    --o-visualization 16S/denoised/dada-210_180-table.qzv

time qiime metadata tabulate \
    --m-input-file 16S/denoised/dada-210_180-stats.qza \
    --o-visualization 16S/denoised/dada-210_180-stats.qzv

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada.R --input_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/forward --input_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/reverse --output_path /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/output.tsv.biom --output_track /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/track.tsv --filtered_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_f --filtered_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_r --truncation_length 210 --truncation_length_reverse 180 --trim_left 0 --trim_left_reverse 0 --max_expected_errors 2.0 --max_expected_errors_reverse 2.0 --truncation_quality_score 2 --min_overlap 12 --p

Lade nötiges Paket: Rcpp


DADA2: 1.30.0 / Rcpp: 1.0.13.1 / RcppParallel: 5.1.9 


The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_f/364527_045-LP3-16S-0045_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_r/364527_045-LP3-16S-0045_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_f/364527_162-LP3-16S-0162_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_r/364527_162-LP3-16S-0162_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_f/364527_165-LP3-16S-0165_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_r/364527_165-LP3-16S-0165_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpb5q4e66r/filt_f/364527_258-LP3-16S-0258_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl4

2) Filtering ...........................................x....................................................................................................................x..x............................................................................................x..............................................................................................................................................................x...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................


real	373m15.450s
user	256m52.574s
sys	1m36.576s


Saved Visualization to: 16S/denoised/dada-210_180-rep-seqs.qzv



real	0m11.701s
user	0m9.412s
sys	0m1.203s


Saved Visualization to: 16S/denoised/dada-210_180-table.qzv



real	0m7.874s
user	0m6.424s
sys	0m1.208s


Saved Visualization to: 16S/denoised/dada-210_180-stats.qzv



real	0m5.911s
user	0m4.754s
sys	0m0.984s


In [5]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs 16S/16S-demux-paired-end.qza \
    --p-trunc-len-f 200 \
    --p-trunc-len-r 170 \
    --o-representative-sequences 16S/denoised/dada-200_170-rep-seqs.qza \
    --o-table 16S/denoised/dada-200_170-table.qza \
    --o-denoising-stats 16S/denoised/dada-200_170-stats.qza \
    --verbose

time qiime feature-table tabulate-seqs \
    --i-data 16S/denoised/dada-200_170-rep-seqs.qza \
    --o-visualization 16S/denoised/dada-200_170-rep-seqs.qzv

time qiime feature-table summarize \
    --i-table 16S/denoised/dada-200_170-table.qza \
    --o-visualization 16S/denoised/dada-200_170-table.qzv

time qiime metadata tabulate \
    --m-input-file 16S/denoised/dada-200_170-stats.qza \
    --o-visualization 16S/denoised/dada-200_170-stats.qzv

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada.R --input_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/forward --input_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/reverse --output_path /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/output.tsv.biom --output_track /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/track.tsv --filtered_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_f --filtered_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_r --truncation_length 200 --truncation_length_reverse 170 --trim_left 0 --trim_left_reverse 0 --max_expected_errors 2.0 --max_expected_errors_reverse 2.0 --truncation_quality_score 2 --min_overlap 12 --p

Lade nötiges Paket: Rcpp


DADA2: 1.30.0 / Rcpp: 1.0.13.1 / RcppParallel: 5.1.9 


The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_f/364527_045-LP3-16S-0045_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_r/364527_045-LP3-16S-0045_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_f/364527_165-LP3-16S-0165_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_r/364527_165-LP3-16S-0165_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_f/364527_258-LP3-16S-0258_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_r/364527_258-LP3-16S-0258_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpetfmb2sw/filt_f/364528_431-LP3-16S-1007_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl4

2) Filtering ...........................................x.......................................................................................................................x............................................................................................x..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................


real	258m2.751s
user	255m27.971s
sys	2m0.649s


Saved Visualization to: 16S/denoised/dada-200_170-rep-seqs.qzv



real	0m12.147s
user	0m9.711s
sys	0m1.366s


Saved Visualization to: 16S/denoised/dada-200_170-table.qzv



real	0m8.473s
user	0m6.797s
sys	0m1.404s


Saved Visualization to: 16S/denoised/dada-200_170-stats.qzv



real	0m6.478s
user	0m5.119s
sys	0m1.153s


In [1]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs 16S/16S-demux-paired-end.qza \
    --p-trunc-len-f 180 \
    --p-trunc-len-r 150 \
    --o-representative-sequences 16S/denoised/dada-180_150-rep-seqs.qza \
    --o-table 16S/denoised/dada-180_150-table.qza \
    --o-denoising-stats 16S/denoised/dada-180_150-stats.qza \
    --verbose

time qiime feature-table tabulate-seqs \
    --i-data 16S/denoised/dada-180_150-rep-seqs.qza \
    --o-visualization 16S/denoised/dada-180_150-rep-seqs.qzv

time qiime feature-table summarize \
    --i-table 16S/denoised/dada-180_150-table.qza \
    --o-visualization 16S/denoised/dada-180_150-table.qzv

time qiime metadata tabulate \
    --m-input-file 16S/denoised/dada-180_150-stats.qza \
    --o-visualization 16S/denoised/dada-180_150-stats.qzv

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada.R --input_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/forward --input_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/reverse --output_path /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/output.tsv.biom --output_track /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/track.tsv --filtered_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_f --filtered_directory_reverse /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_r --truncation_length 180 --truncation_length_reverse 150 --trim_left 0 --trim_left_reverse 0 --max_expected_errors 2.0 --max_expected_errors_reverse 2.0 --truncation_quality_score 2 --min_overlap 12 --p

Lade nötiges Paket: Rcpp


DADA2: 1.30.0 / Rcpp: 1.0.13.1 / RcppParallel: 5.1.9 


The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_f/364527_165-LP3-16S-0165_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_r/364527_165-LP3-16S-0165_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_f/364527_258-LP3-16S-0258_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_r/364527_258-LP3-16S-0258_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_f/364528_431-LP3-16S-1007_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_r/364528_431-LP3-16S-1007_L001_R2_001.fastq.gz not written.
The filter removed all reads: /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpqohwzsut/filt_f/364528_432-LP3-16S-1008_L001_R1_001.fastq.gz and /var/folders/qv/m0nf9nf10_3_dt4rl4

2) Filtering ...................................................................................................................................................................x............................................................................................x..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................


real	282m0.538s
user	233m5.808s
sys	1m46.011s


Saved Visualization to: 16S/denoised/dada-180_150-rep-seqs.qzv



real	0m12.335s
user	0m10.132s
sys	0m1.335s


Saved Visualization to: 16S/denoised/dada-180_150-table.qzv



real	0m8.906s
user	0m7.121s
sys	0m1.380s


Saved Visualization to: 16S/denoised/dada-180_150-stats.qzv



real	0m6.638s
user	0m5.322s
sys	0m1.123s


### 4.2 Denoise ITS single-end, no cutadapt

In [None]:
%%bash

cd /Users/svens/microterroir/artefacts

time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 180 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-180.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-180.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-180.qza 
    
time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 190 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-190.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-190.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-190.qza

time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 200 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-200.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-200.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-200.qza

time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 210 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-210.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-210.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-210.qza

time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS-demux-paired-end.qza \
    --p-trunc-len 220 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/dada2-nocutadapt/dada-rep-seqs-220.qza \
    --o-table ITS/dada2-nocutadapt/dada-table-220.qza \
    --o-denoising-stats ITS/dada2-nocutadapt/dada-stats-220.qza

### 4.3 ITS Cutadapt without Primer + denoising w/o truncation

In [None]:
%%bash

cd /Users/svens/microterroir/artefacts

time qiime cutadapt trim-single \
    --i-demultiplexed-sequences ITS-demux-single-end.qza \
    --o-trimmed-sequences ITS/cutadapt-noprimer/ITS-trimmed-noprimer.qza

In [2]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS-trimmed-noprimer.qza \
    --p-trunc-len 0 \
    --p-n-threads 5 \
    --o-representative-sequences ITS-trimmed-noprimer-denoised-rep-seqs.qza \
    --o-table ITS-trimmed-noprimer-denoised-table.qza \
    --o-denoising-stats ITS-trimmed-noprimer-denoised-stats.qza

time qiime feature-table tabulate-seqs \
    --i-data ITS-trimmed-noprimer-denoised-rep-seqs.qza \
    --o-visualization ITS-trimmed-noprimer-denoised-rep-seqs.qzv

time qiime feature-table summarize \
    --i-table ITS-trimmed-noprimer-denoised-table.qza \
    --o-visualization ITS-trimmed-noprimer-denoised-table.qzv

time qiime metadata tabulate \
    --m-input-file ITS-trimmed-noprimer-denoised-stats.qza \
    --o-visualization ITS-trimmed-noprimer-denoised-stats.qzv

Saved FeatureTable[Frequency] to: ITS-trimmed-noprimer-denoised-table.qza
Saved FeatureData[Sequence] to: ITS-trimmed-noprimer-denoised-rep-seqs.qza
Saved SampleData[DADA2Stats] to: ITS-trimmed-noprimer-denoised-stats.qza



real	8m48.422s
user	20m6.053s
sys	0m17.536s


Saved Visualization to: ITS-trimmed-noprimer-denoised-rep-seqs.qzv



real	0m13.296s
user	0m6.010s
sys	0m1.282s


Saved Visualization to: ITS-trimmed-noprimer-denoised-table.qzv



real	0m12.117s
user	0m5.515s
sys	0m1.272s


Saved Visualization to: ITS-trimmed-noprimer-denoised-stats.qzv



real	0m11.232s
user	0m4.903s
sys	0m1.133s


### 4.4 Cutadapt with primer + denoising w/o truncation

In [19]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime cutadapt trim-single \
    --i-demultiplexed-sequences ITS/single-end/ITS-demux-single-end.qza \
    --p-adapter AACTTTARYCRAAGGATCTC \
    --o-trimmed-sequences ITS/single-end/ITS-revcomp-trimmed.qza \
    --verbose

Running external command line application. This may print messages to stdout and/or stderr.
The commands to be run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: cutadapt --cores 1 --error-rate 0.1 --times 1 --overlap 3 --minimum-length 1 -q 0,0 --quality-base 33 -o /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/qiime2/svens/processes/16494-1732629237.182453@svens/tmp/q2-OutPath-1_g76so9/364525_001-LP3-ITS-0001_318_L001_R1_001.fastq.gz --adapter AACTTTARYCRAAGGATCTC /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/qiime2/svens/data/57423b24-a117-4fa5-8467-c8ece8970919/data/364525_001-LP3-ITS-0001_318_L001_R1_001.fastq.gz

This is cutadapt 4.9 with Python 3.10.14
Command line parameters: --cores 1 --error-rate 0.1 --times 1 --overlap 3 --minimum-length 1 -q 0,0 --quality-base 33 -o /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/qiime2/svens/processes/16494-1732629237.182453@svens/tmp/q2-OutPath-1_g76so9/3645


real	9m20.738s
user	7m25.936s
sys	1m12.539s


In [20]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS/single-end/ITS-revcomp-trimmed.qza \
    --p-trunc-len 0 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/single-end/ITS-revcomp-trimmed-denoised-rep-seqs.qza \
    --o-table ITS/single-end/ITS-revcomp-trimmed-denoised-table.qza \
    --o-denoising-stats ITS/single-end/ITS-revcomp-trimmed-denoised-stats.qza

time qiime feature-table tabulate-seqs \
    --i-data ITS/single-end/ITS-revcomp-trimmed-denoised-rep-seqs.qza \
    --o-visualization ITS/single-end/ITS-revcomp-trimmed-denoised-rep-seqs.qzv

time qiime feature-table summarize \
    --i-table ITS/single-end/ITS-revcomp-trimmed-denoised-table.qza \
    --o-visualization ITS/single-end/ITS-revcomp-trimmed-denoised-table.qzv

time qiime metadata tabulate \
    --m-input-file ITS/single-end/ITS-revcomp-trimmed-denoised-stats.qza \
    --o-visualization ITS/single-end/ITS-revcomp-trimmed-denoised-stats.qzv

Saved FeatureTable[Frequency] to: ITS/single-end/ITS-revcomp-trimmed-denoised-table.qza
Saved FeatureData[Sequence] to: ITS/single-end/ITS-revcomp-trimmed-denoised-rep-seqs.qza
Saved SampleData[DADA2Stats] to: ITS/single-end/ITS-revcomp-trimmed-denoised-stats.qza



real	9m1.237s
user	19m14.519s
sys	0m19.803s


Saved Visualization to: ITS/single-end/ITS-revcomp-trimmed-denoised-rep-seqs.qzv



real	0m8.093s
user	0m5.835s
sys	0m1.308s


Saved Visualization to: ITS/single-end/ITS-revcomp-trimmed-denoised-table.qzv



real	0m7.212s
user	0m5.600s
sys	0m1.313s


Saved Visualization to: ITS/single-end/ITS-revcomp-trimmed-denoised-stats.qzv



real	0m6.313s
user	0m4.933s
sys	0m1.178s


In [23]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-single \
    --i-demultiplexed-seqs ITS/single-end/revcomp-trimmed/ITS-revcomp-trimmed.qza \
    --p-trunc-len 190 \
    --p-n-threads 5 \
    --o-representative-sequences ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-rep-seqs.qza \
    --o-table ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-table.qza \
    --o-denoising-stats ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-stats.qza \
    --verbose

time qiime feature-table tabulate-seqs \
    --i-data ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-rep-seqs.qza \
    --o-visualization ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-rep-seqs.qzv

time qiime feature-table summarize \
    --i-table ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-table.qza \
    --o-visualization ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-table.qzv

time qiime metadata tabulate \
    --m-input-file ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-stats.qza \
    --o-visualization ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-stats.qzv

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada.R --input_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/qiime2/svens/data/401ed144-77b7-4d67-87f0-37485df4f98a/data --output_path /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpp7nacakv/output.tsv.biom --output_track /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpp7nacakv/track.tsv --filtered_directory /var/folders/qv/m0nf9nf10_3_dt4rl46kwy5r0000gn/T/tmpp7nacakv --truncation_length 190 --trim_left 0 --max_expected_errors 2.0 --truncation_quality_score 2 --max_length Inf --pooling_method independent --chimera_method consensus --min_parental_fold 1.0 --allow_one_off False --num_threads 5 --learn_min_reads 1000000 --homopolymer_gap_penalty NULL --band_size 16

R version 4.3.3 (2024-02-29) 


Lade nötiges Paket: Rcpp


DADA2: 1.30.0 / Rcpp: 1.0.13.1 / RcppParallel: 5.1.9 
2) Filtering .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................


real	8m49.882s
user	22m19.458s
sys	0m24.935s


Saved Visualization to: ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-rep-seqs.qzv



real	0m11.751s
user	0m9.495s
sys	0m1.278s


Saved Visualization to: ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-table.qzv



real	0m8.102s
user	0m6.512s
sys	0m1.314s


Saved Visualization to: ITS/single-end/Final/ITS-revcomp-trimmed-denoised-trunc190-stats.qzv



real	0m6.275s
user	0m4.897s
sys	0m1.186s


### 4.5 ITS Paired-End

In [8]:
%%bash

cd /Users/svens/Documents/MicroTerroir/artefacts

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs paired-end/ITS-demux-paired-end.qza \
    --p-trunc-len-f 180 \
    --p-trunc-len-r 180 \
    --p-n-threads 5 \
    --o-representative-sequences paired-end/paired-dada-rep-seqs-180.qza \
    --o-table paired-end/paired-dada-table-180.qza \
    --o-denoising-stats paired-end/paired-dada-stats-180.qza

time qiime feature-table tabulate-seqs \
    --i-data paired-end/paired-dada-rep-seqs-180.qza \
    --o-visualization paired-end/paired-dada-rep-seqs-180.qzv

time qiime feature-table summarize \
    --i-table paired-end/paired-dada-table-180.qza \
    --o-visualization paired-end/paired-dada-table-180.qzv

time qiime metadata tabulate \
    --m-input-file paired-end/paired-dada-stats-180.qza \
    --o-visualization paired-end/paired-dada-stats-180.qzv
    
time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs paired-end/ITS-demux-paired-end.qza \
    --p-trunc-len-f 190 \
    --p-trunc-len-r 190 \
    --p-n-threads 5 \
    --o-representative-sequences paired-end/paired-dada-rep-seqs-190.qza \
    --o-table paired-end/paired-dada-table-190.qza \
    --o-denoising-stats paired-end/paired-dada-stats-190.qza

time qiime feature-table tabulate-seqs \
    --i-data paired-end/paired-dada-rep-seqs-190.qza \
    --o-visualization paired-end/paired-dada-rep-seqs-190.qzv

time qiime feature-table summarize \
    --i-table paired-end/paired-dada-table-190.qza \
    --o-visualization paired-end/paired-dada-table-190.qzv

time qiime metadata tabulate \
    --m-input-file paired-end/paired-dada-stats-190.qza \
    --o-visualization paired-end/paired-dada-stats-190.qzv

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs paired-end/ITS-demux-paired-end.qza \
    --p-trunc-len-f 200 \
    --p-trunc-len-r 200 \
    --p-n-threads 5 \
    --o-representative-sequences paired-end/paired-dada-rep-seqs-200.qza \
    --o-table paired-end/paired-dada-table-200.qza \
    --o-denoising-stats paired-end/paired-dada-stats-200.qza

time qiime feature-table tabulate-seqs \
    --i-data paired-end/paired-dada-rep-seqs-200.qza \
    --o-visualization paired-end/paired-dada-rep-seqs-200.qzv

time qiime feature-table summarize \
    --i-table paired-end/paired-dada-table-200.qza \
    --o-visualization paired-end/paired-dada-table-200.qzv

time qiime metadata tabulate \
    --m-input-file paired-end/paired-dada-stats-200.qza \
    --o-visualization paired-end/paired-dada-stats-200.qzv

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs paired-end/ITS-demux-paired-end.qza \
    --p-trunc-len-f 210 \
    --p-trunc-len-r 210 \
    --p-n-threads 5 \
    --o-representative-sequences paired-end/paired-dada-rep-seqs-210.qza \
    --o-table paired-end/paired-dada-table-210.qza \
    --o-denoising-stats paired-end/paired-dada-stats-210.qza

time qiime feature-table tabulate-seqs \
    --i-data paired-end/paired-dada-rep-seqs-210.qza \
    --o-visualization paired-end/paired-dada-rep-seqs-210.qzv

time qiime feature-table summarize \
    --i-table paired-end/paired-dada-table-210.qza \
    --o-visualization paired-end/paired-dada-table-210.qzv

time qiime metadata tabulate \
    --m-input-file paired-end/paired-dada-stats-210.qza \
    --o-visualization paired-end/paired-dada-stats-210.qzv

time qiime dada2 denoise-paired \
    --i-demultiplexed-seqs paired-end/ITS-demux-paired-end.qza \
    --p-trunc-len-f 220 \
    --p-trunc-len-r 220 \
    --p-n-threads 5 \
    --o-representative-sequences paired-end/paired-dada-rep-seqs-220.qza \
    --o-table paired-end/paired-dada-table-220.qza \
    --o-denoising-stats paired-end/paired-dada-stats-220.qza

time qiime feature-table tabulate-seqs \
    --i-data paired-end/paired-dada-rep-seqs-220.qza \
    --o-visualization paired-end/paired-dada-rep-seqs-220.qzv

time qiime feature-table summarize \
    --i-table paired-end/paired-dada-table-220.qza \
    --o-visualization paired-end/paired-dada-table-220.qzv

time qiime metadata tabulate \
    --m-input-file paired-end/paired-dada-stats-220.qza \
    --o-visualization paired-end/paired-dada-stats-220.qzv

Saved FeatureTable[Frequency] to: paired-end/paired-dada-table-180.qza
Saved FeatureData[Sequence] to: paired-end/paired-dada-rep-seqs-180.qza
Saved SampleData[DADA2Stats] to: paired-end/paired-dada-stats-180.qza



real	18m29.574s
user	40m48.438s
sys	0m48.067s


Saved Visualization to: paired-end/paired-dada-rep-seqs-180.qzv



real	0m16.352s
user	0m9.106s
sys	0m1.257s


Saved Visualization to: paired-end/paired-dada-table-180.qzv



real	0m13.113s
user	0m6.510s
sys	0m1.322s


Saved Visualization to: paired-end/paired-dada-stats-180.qzv



real	0m11.574s
user	0m5.152s
sys	0m1.166s


Saved FeatureTable[Frequency] to: paired-end/paired-dada-table-190.qza
Saved FeatureData[Sequence] to: paired-end/paired-dada-rep-seqs-190.qza
Saved SampleData[DADA2Stats] to: paired-end/paired-dada-stats-190.qza



real	125m17.030s
user	42m6.044s
sys	0m48.548s


Saved Visualization to: paired-end/paired-dada-rep-seqs-190.qzv



real	0m17.662s
user	0m10.055s
sys	0m1.514s


Saved Visualization to: paired-end/paired-dada-table-190.qzv



real	0m13.474s
user	0m6.661s
sys	0m1.510s


Saved Visualization to: paired-end/paired-dada-stats-190.qzv



real	0m11.434s
user	0m4.967s
sys	0m1.255s


Saved FeatureTable[Frequency] to: paired-end/paired-dada-table-200.qza
Saved FeatureData[Sequence] to: paired-end/paired-dada-rep-seqs-200.qza
Saved SampleData[DADA2Stats] to: paired-end/paired-dada-stats-200.qza



real	19m51.698s
user	43m22.761s
sys	1m11.662s


Saved Visualization to: paired-end/paired-dada-rep-seqs-200.qzv



real	0m18.714s
user	0m10.742s
sys	0m1.800s


Saved Visualization to: paired-end/paired-dada-table-200.qzv



real	0m14.035s
user	0m6.976s
sys	0m1.739s


Saved Visualization to: paired-end/paired-dada-stats-200.qzv



real	0m11.694s
user	0m5.108s
sys	0m1.351s


Saved FeatureTable[Frequency] to: paired-end/paired-dada-table-210.qza
Saved FeatureData[Sequence] to: paired-end/paired-dada-rep-seqs-210.qza
Saved SampleData[DADA2Stats] to: paired-end/paired-dada-stats-210.qza



real	19m42.321s
user	41m59.870s
sys	1m11.082s


Saved Visualization to: paired-end/paired-dada-rep-seqs-210.qzv



real	0m18.339s
user	0m10.475s
sys	0m1.708s


Saved Visualization to: paired-end/paired-dada-table-210.qzv



real	0m13.834s
user	0m6.829s
sys	0m1.678s


Saved Visualization to: paired-end/paired-dada-stats-210.qzv



real	0m11.627s
user	0m5.031s
sys	0m1.344s


Saved FeatureTable[Frequency] to: paired-end/paired-dada-table-220.qza
Saved FeatureData[Sequence] to: paired-end/paired-dada-rep-seqs-220.qza
Saved SampleData[DADA2Stats] to: paired-end/paired-dada-stats-220.qza



real	17m51.453s
user	38m32.283s
sys	1m3.862s


Saved Visualization to: paired-end/paired-dada-rep-seqs-220.qzv



real	0m17.206s
user	0m9.514s
sys	0m1.614s


Saved Visualization to: paired-end/paired-dada-table-220.qzv



real	0m13.686s
user	0m6.718s
sys	0m1.607s


Saved Visualization to: paired-end/paired-dada-stats-220.qzv



real	0m11.766s
user	0m5.179s
sys	0m1.338s


## 5. Filtering "NOT-USE" samples

In [6]:
import pandas as pd

# Load the CSV file into a DataFrame
csv_file = '/Users/svens/Documents/MicroTerroir/ITS_demux_lp.csv'  # Replace with your CSV file name
data = pd.read_csv(csv_file)

# Filter rows where 'Project' is NOT 'NOT-USE'
filtered_data = data[data['Project'] != 'NOT-USE']

# Extract the Sample IDs
sample_ids = filtered_data['Id'].tolist()  # Replace 'Sample ID' with the actual column name if different

# Save to a text file or print
with open('/Users/svens/Documents/MicroTerroir/filtered_sample_ids.txt', 'w') as output_file:
    for sample_id in sample_ids:
        output_file.write(str(sample_id) + '\n')

# Print the list (optional)
print(sample_ids)

[811547, 811426, 811305, 811548, 811427, 811306, 811549, 811428, 811307, 811429, 811308, 811543, 811422, 811301, 811544, 811423, 811302, 811545, 811424, 811303, 811546, 811425, 811304, 811309, 811550, 811551, 811430, 811431, 811310, 811553, 811432, 811311, 811536, 811415, 811537, 811416, 811538, 811417, 811418, 811532, 811411, 811533, 811412, 811534, 811413, 811535, 811414, 811419, 811540, 811541, 811420, 811542, 811421, 811300, 811569, 811327, 811206, 811328, 811207, 811329, 811208, 811565, 811444, 811323, 811202, 811566, 811445, 811203, 811567, 811568, 811326, 811205, 811572, 811330, 811573, 811331, 811574, 811332, 811211, 811575, 811454, 811212, 811570, 811571, 811558, 811316, 811559, 811438, 811317, 811439, 811318, 811319, 811554, 811312, 811555, 811313, 811556, 811314, 811557, 811315, 811561, 811440, 811562, 811441, 811320, 811563, 811442, 811321, 811200, 811564, 811443, 811322, 811201, 811560, 811624, 811503, 811625, 811504, 811626, 811505, 811627, 811506, 811620, 811621, 811500,

In [14]:
%%bash

cd /Users/svens/Documents/MicroTerroir

time qiime feature-table filter-samples \
  --i-table artefacts/ITS/single-end/dada-table-190.qza \
  --m-metadata-file ITS-metadata.tsv \
  --p-where "[Project] != 'NOT-USE'" \
  --o-filtered-table artefacts/ITS/single-end/dada-table-190-samfilt.qza


time qiime feature-table summarize \
    --i-table artefacts/ITS/single-end/dada-table-190-samfilt.qza \
    --o-visualization artefacts/ITS/single-end/dada-table-190-samfilt.qzv

Saved FeatureTable[Frequency] to: artefacts/ITS/single-end/dada-table-190-samfilt.qza



real	0m6.024s
user	0m4.962s
sys	0m0.866s


Saved Visualization to: artefacts/ITS/single-end/dada-table-190-samfilt.qzv



real	0m7.990s
user	0m6.349s
sys	0m1.362s


In [15]:
%%bash

cd /Users/svens/Documents/MicroTerroir

time qiime feature-table filter-samples \
  --i-table artefacts/16S/denoised/dada-table.qza \
  --m-metadata-file 16S-samfilt-metadata.tsv \
  --p-where "[Project] != 'NOT-USE'" \
  --o-filtered-table artefacts/16S/denoised/dada-table-samfilt.qza


time qiime feature-table summarize \
    --i-table artefacts/16S/denoised/dada-table-samfilt.qza \
    --o-visualization artefacts/16S/denoised/dada-table-samfilt.qzv

Saved FeatureTable[Frequency] to: artefacts/16S/denoised/dada-table-samfilt.qza



real	0m7.070s
user	0m4.912s
sys	0m0.926s


Saved Visualization to: artefacts/16S/denoised/dada-table-samfilt.qzv



real	0m7.983s
user	0m6.363s
sys	0m1.382s


In [16]:
%%bash

cd /Users/svens/Documents/MicroTerroir

time qiime feature-table summarize \
    --i-table artefacts/16S/denoised/dada-table.qza \
    --o-visualization artefacts/16S/denoised/dada-table.qzv

Saved Visualization to: artefacts/16S/denoised/dada-table.qzv



real	0m8.127s
user	0m6.423s
sys	0m1.351s
