# 0. Setup

In [1]:
import pandas as pd
from qiime2 import Visualization

# 1. Import datasets (downloaded by q2-fondue)

## 1.1 Metadata

In [4]:
! qiime tools extract --input-path data/metadata.qza --output-path data/
! mv data/7dab3233-f05d-4fa2-802c-250900314160/data/sra-metadata.tsv data/metadata.tsv
! rm -rf data/7dab3233-f05d-4fa2-802c-250900314160/
metadata = pd.read_csv("data/metadata.tsv", sep= "\t")

[32mExtracted data/metadata.qza to directory data/7dab3233-f05d-4fa2-802c-250900314160[0m


In [36]:
metadata['Description [sample]'].value_counts()

liquid phase grass    6
rumen fluid corn      6
liquid phase hay      6
rumen fluid grass     6
liquid phase corn     6
rumen fluid hay       6
solid phase hay       6
solid phase corn      6
solid phase grass     6
Name: Description [sample], dtype: int64

In [5]:
# creating two colomns indicating sample type and diet treatments
metadata[['Sample fractions', 'Diets']] = metadata['Description [sample]'].str.rsplit(n=1, expand = True)

In [15]:
metadata.rename(columns = {'ID':'Sample ID'}, inplace = True)
metadata = metadata[['Sample ID', 'Sample fractions', 'Diets']]

In [16]:
metadata.to_csv('data/metadata.tsv', sep="\t", index=False)

In [17]:
metadata = pd.read_csv("data/metadata.tsv", sep= "\t")

## 1.2 Sequence

In [22]:
! qiime tools export --input-path data/sequence.qza --output-path data/paired-sequence
! gunzip data/paired-sequence/*.fastq.gz

[32mExported data/sequence.qza as SingleLanePerSamplePairedEndFastqDirFmt to directory data/paired-sequence[0m
[0m

In [45]:
# check fastq file
! head -n 10 data/paired-sequence/ERR1842970_00_L001_R1_001.fastq

@ERR1842970.1 M00214:420:000000000-AUC9M:1:1102:14964:1811 length=251
AAGCCTCAAGAGTTAGATCATGGCTCAGGATGAACGCTGGTGGCGTGTCTTATACATGCAAGTCGAGCGGTCCGCAAGGATAGCGGCGCTCGGGTGAGTACCGCCTAAGCAATCTGCCCCGCATATTGGGATAGCCGTGCCAACGCGCGGATAATACCCAATAACGTGGCCCCGCACAGCCGGGGTTGACGAAAGATTCATGGATGCCGGATGAGCTTGCGTCCGATGAGCTAGTTGGCGGGGCAACGGCC
+ERR1842970.1 M00214:420:000000000-AUC9M:1:1102:14964:1811 length=251
1A>?1@BFFFFFGG3FGGGGGGHHHHGHFHCGGHHFFGGGBFCGGGCEFGFHEGHFHFHF1GHHGEEECEE/E///EEE0FFF1>//////<///B/CGB1B///</1?0<01?F1B1/F@C?AGEH11<.<CGHFA<AA:00..:;---;-9;FF00/0.;;99.---@FA----9--9-@?9@;-99FBF@FBFFBFFF///BBB/--;-9;FF///---------/9///9://BF?@-;@-AF--@=
@ERR1842970.2 M00214:420:000000000-AUC9M:1:1102:14564:1863 length=251
AAGCCTCAAGGGTTCGATCATGGCTCAGGATGAACGCTAGCGACAGGCTTAACACATGCAAGTCGAGGGGCAGCGCGGAGGTAGCAATACCTCTGGCGGCGACCGGCGCACTGGTGAGTAACACGTATGCGACCTGCCCCGGACAGGGGGATAAACCCGGGAAACTGGGTCTAATACCCCATATGCCCTGGGGACGCATGTCCTTCGGGAGAAAGATCCGTCGGTCCGGGATCGGCATGCGGCCCATGAGC
+ERR1842970.2 M00214:420:000000000

# 2. Sequence Quality

In [28]:
! qiime demux summarize \
    --i-data data/sequence.qza \
    --o-visualization data/demux_seqs.qzv

[32mSaved Visualization to: data/demux_seqs.qzv[0m
[0m

In [3]:
Visualization.load('data/demux_seqs.qzv')

# 3. Denoise

Truncating parameters `p-trunc-len-f` `p-trunc-len-r` were set based on the quality plot, in which quality scores decreased a lot after position 221 and 172 in forward and reverse sequences respectively. Trim parameters `p-trim-left-f` `p-trim-left-r` were set based on the length of barcode (6nt), linker (2nt), and primers(27f: 20nt, 338r: 18nt) in sequences.

In [None]:
! qiime dada2 denoise-paired \
    --i-demultiplexed-seqs data/sequence.qza \
    --p-trunc-len-f 221 \
    --p-trunc-len-r 172 \
    --p-trim-left-f 28 \
    --p-trim-left-r 26 \
    --p-n-threads 3 \
    --o-table data/dada2_table.qza \
    --o-representative-sequences data/dada2_rep_set.qza \
    --o-denoising-stats data/dada2_stats.qza

In [18]:
# denoising stats
! qiime metadata tabulate \
    --m-input-file data/dada2_stats.qza \
    --o-visualization data/dada2_stats.qzv

[32mSaved Visualization to: data/dada2_stats.qzv[0m
[0m

In [3]:
Visualization.load('data/dada2_stats.qzv')

In [19]:
# visualize the feature table
! qiime feature-table summarize \
    --i-table data/dada2_table.qza \
    --m-sample-metadata-file data/metadata.tsv \
    --o-visualization data/dada2_table.qzv

[32mSaved Visualization to: data/dada2_table.qzv[0m
[0m

In [20]:
Visualization.load('data/dada2_table.qzv')