### 0. Setup

Import packages and create folder for data

In [1]:
import os
import pandas as pd
from qiime2 import Visualization
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
# location of this week's data and all the results produced by this notebook 
# - this should be a path relative to your working directory
data_dir = 'seq_data_new'

if not os.path.isdir(data_dir):
    os.makedirs(data_dir)

### 1. Data Import

Import Metadata 

In [14]:
metadata_df = pd.read_csv(f'{data_dir}/sample_meta_data.tsv', sep='\t', index_col=0)

Import sequence data

In [9]:
! wget -nv -O $data_dir/seq_data.qza 'https://polybox.ethz.ch/index.php/s/AsLORlvUbwgBWTq/download'

2022-10-12 09:51:27 URL:https://polybox.ethz.ch/index.php/s/AsLORlvUbwgBWTq/download [1506379068/1506379068] -> "seq_data_new/seq_data.qza" [1]


### 2. Sequence loading and summary visulazation

In [4]:
! qiime tools peek $data_dir/seq_data.qza

[32mUUID[0m:        32a1795b-d6fb-4ecc-9166-4fe29fb8206a
[32mType[0m:        SampleData[PairedEndSequencesWithQuality]
[32mData format[0m: SingleLanePerSamplePairedEndFastqDirFmt


In [4]:
! qiime demux summarize \
    --i-data $data_dir/seq_data.qza \
    --o-visualization $data_dir/seq_data.qzv

[32mSaved Visualization to: seq_data_new/seq_data.qzv[0m
[0m

In [5]:
Visualization.load(f'{data_dir}/seq_data.qzv')

### Denoise Data Sequence Reads

In [8]:
! qiime dada2 denoise-paired \
    --i-demultiplexed-seqs $data_dir/seq_data.qza \
    --p-trunc-len-f 130 \
    --p-trunc-len-r 130 \
    --p-n-threads 3 \
    --o-table $data_dir/dada2_table.qza \
    --o-representative-sequences $data_dir/dada2_rep_set.qza \
    --o-denoising-stats $data_dir/dada2_stats.qza

[32mSaved FeatureTable[Frequency] to: seq_data_new/dada2_table.qza[0m
[32mSaved FeatureData[Sequence] to: seq_data_new/dada2_rep_set.qza[0m
[32mSaved SampleData[DADA2Stats] to: seq_data_new/dada2_stats.qza[0m
[0m

In [6]:
! qiime metadata tabulate \
    --m-input-file $data_dir/dada2_stats.qza \
    --o-visualization $data_dir/dada2_stats.qzv

[32mSaved Visualization to: seq_data_new/dada2_stats.qzv[0m
[0m

In [7]:
Visualization.load(f'{data_dir}/dada2_stats.qzv')

In [11]:
! qiime feature-table tabulate-seqs \
    --i-data $data_dir/dada2_rep_set.qza \
    --o-visualization $data_dir/dada2_rep_set.qzv

[32mSaved Visualization to: seq_data_new/dada2_rep_set.qzv[0m
[0m

In [8]:
Visualization.load(f'{data_dir}/dada2_rep_set.qzv')

In [14]:
! qiime feature-table summarize \
    --i-table $data_dir/dada2_table.qza \
    --m-sample-metadata-file $data_dir/sample_meta_data.tsv \
    --o-visualization $data_dir/dada2_table.qzv

[32mSaved Visualization to: seq_data_new/dada2_table.qzv[0m
[0m

In [9]:
Visualization.load(f'{data_dir}/dada2_table.qzv')

### Quality filtering

In [None]:
! qiime quality-filter q-score \
    --i-demux $data_dir/seq_data.qza \
    --p-min-quality 30 \
    --p-min-length-fraction 0.9 \
    --o-filtered-sequences $data_dir/demux_seqs_qc.qza \
    --o-filter-stats $data_dir/demux_seqs_qc_stats.qza

In [10]:
! qiime demux summarize \
    --i-data $data_dir/demux_seqs_qc.qza \
    --o-visualization $data_dir/demux_seqs_qc.qzv

[32mSaved Visualization to: seq_data_new/demux_seqs_qc.qzv[0m
[0m

In [11]:
Visualization.load(f'{data_dir}/demux_seqs_qc.qzv')