# 1. Metadata handling

In [4]:
# Get the metadata file and store it into data/
!mkdir data/
!wget -nv -O 'data/metadata.tsv' \
	https://data.qiime2.org/2024.10/tutorials/pd-mice/sample_metadata.tsv

2024-11-14 02:49:51 URL:https://doc-00-60-sheets.googleusercontent.com/pub/54bogvaave6cua4cdnls17ksc4/kfkmgnll7qd9i6vvdfmmmr3ph4/1731570590000/105250506097979753968/*/e@2PACX-1vQ3g65vwLGsdtnARKVjuJlJ9wVPOUocJvoluuaB2PO8zA2Q2AwG9q84UvdUtcKkFGVK_32gcFKyqxbE?gid=1509704122&single=true&output=tsv [4431] -> "data/metadata.tsv" [1]


In [6]:
# Summarize and explore the metadata.
!qiime metadata tabulate \
  --m-input-file data/metadata.tsv \
  --o-visualization data/metadata.qzv

[32mSaved Visualization to: data/metadata.qzv[0m
[0m

In [7]:
# Visualize the output
!qiime tools view data/metadata.qzv

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.Opening in existing browser session.

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.

# 2. Importing data into QIIME 2¶


In [8]:
# Get the manifest file and store it into data/
!wget -nv -O 'data/manifest.tsv' \
	https://data.qiime2.org/2024.10/tutorials/pd-mice/manifest

2024-11-14 03:25:17 URL:https://s3-us-west-2.amazonaws.com/qiime2-data/2024.10/tutorials/pd-mice/manifest [4640/4640] -> "data/manifest.tsv" [1]


In [11]:
# Download sequences
!wget -nv -O "demultiplexed_seqs.zip" \
  "https://data.qiime2.org/2024.10/tutorials/pd-mice/demultiplexed_seqs.zip"

2024-11-14 03:29:45 URL:https://s3-us-west-2.amazonaws.com/qiime2-data/2024.10/tutorials/pd-mice/demultiplexed_seqs.zip [21508775/21508775] -> "demultiplexed_seqs.zip" [1]


In [12]:
# unzip and delete
!unzip demultiplexed_seqs.zip
!rm demultiplexed_seqs.zip

Archive:  demultiplexed_seqs.zip
   creating: demultiplexed_seqs/
  inflating: demultiplexed_seqs/10483.recip.539.ASO.PD4.D7_4_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.539.ASO.PD4.D14_5_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.413.WT.HC2.D7_12_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.220.WT.OB1.D7_30_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.458.ASO.HC3.D49_2_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.538.WT.PD4.D21_4_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.459.WT.HC3.D14_2_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.461.ASO.HC3.D7_20_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.465.ASO.PD3.D14_16_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.461.ASO.HC3.D21_11_L001_R1_001.fastq.gz  
  inflating: demultiplexed_seqs/10483.recip.540.ASO.HC4.D7_7_L001_R1_001.fastq.gz  
  i

In [13]:
# import the data
!qiime tools import \
  --type "SampleData[SequencesWithQuality]" \
  --input-format SingleEndFastqManifestPhred33V2 \
  --input-path ./data/manifest.tsv \
  --output-path ./data/demux_seqs.qza

[32mImported ./data/manifest.tsv as SingleEndFastqManifestPhred33V2 to ./data/demux_seqs.qza[0m
[0m

In [17]:
# Output visualization
!mkdir output
!qiime demux summarize \
  --i-data ./data/demux_seqs.qza \
  --o-visualization ./output/demux_seqs.qzv

mkdir: cannot create directory ‘output’: File exists
[32mSaved Visualization to: ./output/demux_seqs.qzv[0m
[0m

In [20]:
# Visualize results
!qiime tools view output/demux_seqs.qzv

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.Opening in existing browser session.

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.

# 3. Sequence quality control (denoising) and feature table

In [24]:
# Denoise the data
!mkdir denoised
!qiime dada2 denoise-single \
  --i-demultiplexed-seqs ./data/demux_seqs.qza \
  --p-trunc-len 150 \
  --o-table ./denoised/dada2_table.qza \
  --o-representative-sequences ./denoised/dada2_rep_set.qza \
  --o-denoising-stats ./denoised/dada2_stats.qza

[32mSaved FeatureTable[Frequency] to: ./denoised/dada2_table.qza[0m
[32mSaved FeatureData[Sequence] to: ./denoised/dada2_rep_set.qza[0m
[32mSaved SampleData[DADA2Stats] to: ./denoised/dada2_stats.qza[0m
[0m

In [25]:
# denoise the data statistically
!qiime metadata tabulate \
  --m-input-file ./denoised/dada2_stats.qza  \
  --o-visualization ./denoised/dada2_stats.qzv

[32mSaved Visualization to: ./denoised/dada2_stats.qzv[0m
[0m

In [26]:
# Visualize table
!qiime tools view denoised/dada2_stats.qzv

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.Opening in existing browser session.

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.

In [27]:
# Feature table summary
!qiime feature-table summarize \
  --i-table ./denoised/dada2_table.qza \
  --m-sample-metadata-file ./data/metadata.tsv \
  --o-visualization ./output/dada2_table.qzv

[32mSaved Visualization to: ./output/dada2_table.qzv[0m
[0m

# 4. Generating a phylogenetic tree for diversity analysis

In [None]:
# Fragment insertion plugin suggest that it can outperform traditional alignment based methods based on short Illumina reads by alignment against a reference tree built out of larger sequences
!mkdir tree
!wget -nv -O "./tree/sepp-refs-gg-13-8.qza" \
  "https://data.qiime2.org/classifiers/sepp-ref-dbs/sepp-refs-gg-13-8.qza"

mkdir: cannot create directory ‘tree’: File exists
2024-11-14 04:32:33 URL:https://s3-us-west-2.amazonaws.com/qiime2-data/classifiers/sepp-ref-dbs/sepp-refs-gg-13-8.qza [50161069/50161069] -> "./tree/sepp-refs-gg-13-8.qza" [1]


In [None]:
# create a fragment insertion tree by fragment insertion
!qiime fragment-insertion sepp \
  --i-representative-sequences ./denoised/dada2_rep_set.qza \
  --i-reference-database ./tree/sepp-refs-gg-13-8.qza \
  --o-tree ./tree/tree.qza \
  --o-placements ./tree/tree_placements.qza \
  --p-threads 2  # update to a higher number if you can

# explore outcome in iTOL

[32mSaved Phylogeny[Rooted] to: ./tree/tree.qza[0m
[32mSaved Placements to: ./tree/tree_placements.qza[0m
[0m

# 5. Alpha Rarefaction and Selecting a Rarefaction Depth

# 6. Diversity analysis


# 7. Taxonomic classification


In [None]:
# Download a pre-trained naive Bayes machine-learning classifier
!mkdir taxonomic_profile
!wget -nv -O "./taxonomic_profile/gg-13-8-99-515-806-nb-classifier.qza" \
  "https://data.qiime2.org/classifiers/sklearn-1.4.2/greengenes/gg-13-8-99-515-806-nb-classifier.qza"

# Naive Bayes classifiers perform best when they’re trained for the specific hypervariable region amplified.

2024-11-14 05:49:57 URL:https://s3-us-west-2.amazonaws.com/qiime2-data/classifiers/sklearn-1.4.2/greengenes/gg-13-8-99-515-806-nb-classifier.qza [2334033/2334033] -> "./taxonomic_profile/gg-13-8-99-515-806-nb-classifier.qza" [1]


In [34]:
#  train a classifier specific for your dataset
!qiime feature-classifier classify-sklearn \
  --i-reads ./denoised/dada2_rep_set.qza \
  --i-classifier ./taxonomic_profile/gg-13-8-99-515-806-nb-classifier.qza \
  --o-classification ./taxonomic_profile/taxonomy.qza

[32mSaved FeatureData[Taxonomy] to: ./taxonomic_profile/taxonomy.qza[0m
[0m

In [35]:
# Review the taxonomy associated with the sequences
!qiime metadata tabulate \
  --m-input-file ./taxonomic_profile/taxonomy.qza \
  --o-visualization ./taxonomic_profile/taxonomy.qzv

[32mSaved Visualization to: ./taxonomic_profile/taxonomy.qzv[0m
[0m

In [36]:
# Visualize table
!qiime tools view ./taxonomic_profile/taxonomy.qzv

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.Opening in existing browser session.

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.

In [37]:
# Visualize feature table
!qiime feature-table tabulate-seqs \
  --i-data ./denoised/dada2_rep_set.qza \
  --o-visualization ./taxonomic_profile/dada2_rep_set.qzv

[32mSaved Visualization to: ./taxonomic_profile/dada2_rep_set.qzv[0m
[0m

In [39]:
# Visualize table
!qiime tools view ./taxonomic_profile/dada2_rep_set.qzv

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.Opening in existing browser session.

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.

## Taxonomy Barchart

In [42]:
# filter out samples with fewer sequences than our rarefaction depth
!qiime feature-table filter-samples \
  --i-table ./denoised/dada2_table.qza \
  --p-min-frequency 2000 \
  --o-filtered-table ./taxonomic_profile/table_2k.qza

[32mSaved FeatureTable[Frequency] to: ./taxonomic_profile/table_2k.qza[0m
[0m

In [43]:
# Build interactive barplot
!qiime taxa barplot \
  --i-table ./taxonomic_profile/table_2k.qza \
  --i-taxonomy ./taxonomic_profile/taxonomy.qza \
  --m-metadata-file ./data/metadata.tsv \
  --o-visualization ./taxonomic_profile/taxa_barplot.qzv

[32mSaved Visualization to: ./taxonomic_profile/taxa_barplot.qzv[0m
[0m

In [44]:
# Visualize barplot
!qiime tools view ./taxonomic_profile/taxa_barplot.qzv

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.Opening in existing browser session.

Press the 'q' key, Control-C, or Control-D to quit. This view may no longer be accessible or work correctly after quitting.