### 0. Setup

Import packages and create folder for data

In [16]:
import os
import pandas as pd
from qiime2 import Visualization
import matplotlib.pyplot as plt

%matplotlib inline

In [17]:
# location of this week's data and all the results produced by this notebook 
# - this should be a path relative to your working directory
data_dir = 'seq_data_new'

if not os.path.isdir(data_dir):
    os.makedirs(data_dir)

### 1. Data Import

Import Metadata 

In [18]:
metadata_df = pd.read_csv(f'{data_dir}/sample_meta_data.tsv', sep='\t', index_col=0)

Import sequence data

In [19]:
! wget -nv -O $data_dir/seq_data.qza 'https://polybox.ethz.ch/index.php/s/AsLORlvUbwgBWTq/download'

2022-10-18 11:46:25 URL:https://polybox.ethz.ch/index.php/s/AsLORlvUbwgBWTq/download [1506379068/1506379068] -> "seq_data_new/seq_data.qza" [1]


### 2. Sequence loading and summary visulazation

In [20]:
! qiime tools peek $data_dir/seq_data.qza

[32mUUID[0m:        32a1795b-d6fb-4ecc-9166-4fe29fb8206a
[32mType[0m:        SampleData[PairedEndSequencesWithQuality]
[32mData format[0m: SingleLanePerSamplePairedEndFastqDirFmt


In [21]:
! qiime demux summarize \
    --i-data $data_dir/seq_data.qza \
    --o-visualization $data_dir/seq_data.qzv

[32mSaved Visualization to: seq_data_new/seq_data.qzv[0m
[0m

In [22]:
Visualization.load(f'{data_dir}/seq_data.qzv')

### Quality filtering

In [28]:
! qiime quality-filter q-score \
    --i-demux $data_dir/seq_data.qza \
    --p-min-quality 30 \
    --p-min-length-fraction 0.9 \
    --o-filtered-sequences $data_dir/demux_seqs_qc.qza \
    --o-filter-stats $data_dir/demux_seqs_qc_stats.qza

[31m[1mError: QIIME 2 plugin 'quality-filter' has no action 'q-score-joined'.[0m


In [29]:
! qiime metadata tabulate \
    --m-input-file $data_dir/demux_seqs_qc_stats.qza \
    --o-visualization $data_dir/demux_seqs_qc_stats.qzv

[32mSaved Visualization to: seq_data_new/demux_seqs_qc_stats.qzv[0m
[0m

In [30]:
Visualization.load(f'{data_dir}/demux_seqs_qc_stats.qzv')

In [24]:
! qiime demux summarize \
    --i-data $data_dir/demux_seqs_qc.qza \
    --o-visualization $data_dir/demux_seqs_qc.qzv

[32mSaved Visualization to: seq_data_new/demux_seqs_qc.qzv[0m
[0m

In [25]:
Visualization.load(f'{data_dir}/demux_seqs_qc.qzv')

### Dereplication and Chimera removal

In [None]:
# Dereplication

! qiime vsearch dereplicate-sequences \
    --i-sequences $data_dir/demux_seqs_qc.qza \
    --o-dereplicated-sequences $data_dir/demux_seqs_derep.qza \
    --o-dereplicated-table $data_dir/demux_table_derep.qza

In [None]:
# Chimera removal

! qiime vsearch uchime-denovo \
    --i-sequences $data_dir/demux_seqs_derep.qza \
    --i-table $data_dir/demux_table_derep.qza \
    --o-chimeras $data_dir/demux_chimeras.qza \
    --o-nonchimeras $data_dir/demux_nonchimeras.qza \
    --o-stats $data_dir/demux_chimera_stats.qza


In [None]:
! qiime feature-table filter-features \
    --i-table $data_dir/demux_table_derep.qza \
    --m-metadata-file $data_dir/demux_nonchimeras.qza \
    --o-filtered-table $data_dir/demux_table_filtered.qza

! qiime feature-table filter-seqs \
    --i-data $data_dir/demux_seqs_derep.qza \
    --m-metadata-file $data_dir/demux_nonchimeras.qza \
    --o-filtered-data $data_dir/demux_seqs_filtered.qza

! qiime feature-table summarize \
    --i-table $data_dir/demux_table_filtered.qza \
    --o-visualization $data_dir/demux_table_filtered.qzv

In [None]:
Visualization.load(f'{data_dir}/demux_table_filtered.qzv')

### Clustering

a) De novo clustering

In [None]:
! qiime vsearch cluster-features-de-novo \
    --i-table $data_dir/demux_table_filtered.qza \
    --i-sequences $data_dir/demux_seqs_filtered.qza \
    --p-perc-identity 0.91 \
    --p-threads 3 \
    --o-clustered-table $data_dir/demux_table_de_novo_91.qza \
    --o-clustered-sequences $data_dir/demux_rep_set_de_novo_91.qza

In [None]:
! qiime feature-table tabulate-seqs \
    --i-data $data_dir/demux_rep_set_de_novo_91.qza \
    --o-visualization $data_dir/demux_rep_set_de_novo_91.qzv

! qiime feature-table summarize \
    --i-table $data_dir/demux_table_de_novo_91.qza \
    --m-sample-metadata-file $data_dir/metadata.tsv \
    --o-visualization $data_dir/demux_table_de_novo_91.qzv

In [None]:
Visualization.load(f'{data_dir}/demux_rep_set_de_novo_91.qzv')

In [None]:
Visualization.load(f'{data_dir}/demux_table_de_novo_91.qzv')

b) Open reference clustering

In [None]:
! qiime tools import \
    --type 'FeatureData[Sequence]' \
    --input-path $data_dir/91_otus.fasta \
    --output-path $data_dir/91_otus.qza

In [None]:
! qiime vsearch cluster-features-open-reference \
    --i-table $data_dir/demux_table_filtered.qza \
    --i-sequences $data_dir/demux_seqs_filtered.qza \
    --i-reference-sequences $data_dir/91_otus.qza \
    --p-perc-identity 0.91 \
    --p-threads 3 \
    --o-clustered-table $data_dir/demux_table_open_ref_91.qza \
    --o-clustered-sequences $data_dir/demux_seqs_open_ref_91.qza \
    --o-new-reference-sequences $data_dir/demux_seqs_open_ref_new_91.qza

In [None]:
! qiime feature-table tabulate-seqs \
    --i-data $data_dir/demux_seqs_open_ref_91.qza \
    --o-visualization $data_dir/demux_seqs_open_ref_91.qzv

! qiime feature-table summarize \
    --i-table $data_dir/demux_table_open_ref_91.qza \
    --m-sample-metadata-file $data_dir/metadata.tsv \
    --o-visualization $data_dir/demux_table_open_ref_91.qzv

In [None]:
Visualization.load(f'{data_dir}/demux_seqs_open_ref_91.qzv')

In [None]:
Visualization.load(f'{data_dir}/demux_table_open_ref_91.qzv')