#### Adam Klie<br>05/07/2020
## Perform core microbiome analyses in Qiime2
1. Taxonomic classification
2. Rarefaction plots
3. Core diversity
4. Diffential abundance
5. Compositionally aware diversity
6. Sample classification

In [139]:
import qiime2
import warnings
warnings.filterwarnings('ignore')
from qiime2.plugins import feature_table, taxa, diversity, songbird, metadata, deicode, emperor, qurro, sample_classifier

## Directory set-up and data loading

In [2]:
# Data and results directories
data_dir = '../data/test/2020_05_05'
results_dir = '../results/test/2020_05_06'

In [3]:
# Specific results directories
tax_dir = '{}/taxonomy'.format(results_dir)
table_dir = '{}/feature_tables'.format(results_dir)
tree_dir = '{}/fragment_insertion'.format(results_dir)
diversity_dir = '{}/diversity'.format(results_dir)
diff_dir = '{}/differential_abundance'.format(results_dir)

In [4]:
# Load feature table and meta_data
feature_table = qiime2.Artifact.load('{}/insertion-table.qza'.format(table_dir))
meta_data = qiime2.Metadata.load('{}/metadata.tsv'.format(data_dir)) 

In [5]:
# Filter meta_data for faster plotting
filtered_meta_data = qiime2.Metadata(meta_data.to_dataframe()[["age_corrected", "sex", "body_site", 
                                                               "sample_type", "bmi_corrected", "exercise_frequency", 
                                                               "country", "race",  "diet_type"]])

## Taxonomy analysis
 - Inputs: 
     - insertion-table.qza (FeatureData[Frequency] artifact)
     - bespoke-taxonomy.qza (FeatureData[Taxonomy] artifact)
     - metadata.tsv (tab seperated table)
 - Outputs: 
     - taxa-bar-plot.qzv (Visualization)
 - Plugins: 
     - taxa

In [36]:
# Load taxonomy artifact
taxonomic_class = qiime2.Artifact.load('{}/bespoke-taxonomy.qza'.format(tax_dir))

In [None]:
# Generate barplot of taxa
taxonomic_class_vis = taxa.visualizers.barplot(table=feature_table,
                                               taxonomy=taxonomic_class,
                                               metadata=meta_data)
#taxonomic_class_vis.visualization

In [None]:
# Save the barplot as visualization
taxonomic_class_vis.visualization.save('{}/taxa-bar-plot.qzv'.format(tax_dir))

## Rarefaction plots
 - Inputs: 
     - insertion-table.qza (FeatureData[Frequency] artifact)
     - sepp-tree.qza (Phylogeny[Rooted] artifact)
     - metadata.tsv (tab seperated table) -- highly recommend using filtered metadata here
 - Outputs: 
     - alpha-rarefaction.qzv (Visualization)
 - Plugins: 
     - diversity

In [6]:
# Load the insertion tree
tree = qiime2.Artifact.load('{}/sepp-tree.qza'.format(tree_dir))

In [33]:
# Create rarefaction plots, takes about 4 minutes with ~30 samples and 9 metadata columns
alpha_rarefaction_vis = diversity.visualizers.alpha_rarefaction(table=feature_table,
                                                                max_depth=10000,
                                                                phylogeny=tree,
                                                                metadata=filtered_meta_data)
#alpha_rarefaction_vis.visualization

CPU times: user 4min 1s, sys: 9.87 s, total: 4min 11s
Wall time: 4min 19s


In [41]:
# Save the visualization
alpha_rarefaction_vis.visualization.save('{}/alpha-rarefaction.qzv'.format(diversity_dir))

'../results/test/2020_05_06/diversity/alpha-rarefaction.qzv'

## Core diversity
 - Inputs: 
     - insertion-table.qza (FeatureData[Frequency] artifact)
     - sepp-tree.qza (Phylogeny[Rooted] artifact)
     - metadata.tsv (tab seperated table) -- highly recommend using filtered metadata here
 - Outputs: 
     - rarefied-table.qza (FeatureTable[Frequency]
     - rarefied-table.qzv (Visualization)
     - shannon-vector.qza SampleData[AlphaDiversity]
     - faith-pd-vector.qza SampleData[AlphaDiversity]
     - faith-pd-group-significance.qzv (Visualization)
     - uw-unifrac-exercise-significance.qzv (Visualization)
     - uw-unifrac-emperor.qzv (Visualization)
 - Plugins: 
     - diversity
     - feature-table

In [137]:
# Depth to rarefy to, will lose all samples below this depth and sample from those above
depth = 5000

In [12]:
# Perform core diversity analysis
core_diversity = diversity.pipelines.core_metrics_phylogenetic(feature_table,
                                                               phylogeny=tree,
                                                               sampling_depth=depth,
                                                               metadata=filtered_meta_data,
                                                               n_jobs=1)

In [141]:
# Visualize the rarefied feature table used for analysis
rarefied_table_vis = feature_table.visualizers.summarize(table=core_diversity.rarefied_table, sample_metadata=filtered_meta_data)

In [142]:
# Save vectors and rarefied table
core_diversity.rarefied_table.save('{}/rarefied-table.qza'.format(table_dir))
rarefied_table_vis.visualization.save('{}/rarefied-table.qzv'.format(table_dir))
core_diversity.shannon_vector.save('{}/shannon-vector.qza'.format(diversity_dir))
core_diversity.faith_pd_vector.save('{}/faith-pd-vector.qza'.format(diversity_dir))

'../results/test/2020_05_06/diversity/faith-pd-vector.qza'

### A. Alpha diversity significance testing
As recommended by Bod, test alpha significance for shannon diversity and faith's pd
 - Input
     - shannon-vector.qza (SampleData[AlphaDiversity])
     - faith-pd-vector.qza (SampleData[AlphaDiversity])
 - Output
     - shannon-group-significance.qzv (Visualization)
     - faith-pd-group-significance.qzv (Visualization)
 - Plugin
     - diversity

In [45]:
# Test and visualize shannon diversity for group significance
shannon_div = diversity.visualizers.alpha_group_significance(alpha_diversity=core_diversity.shannon_vector, metadata=filtered_meta_data)
#shannon_div.visualization

In [47]:
# Test and visualize faith's pd diversity for group significance
faiths_div = diversity.visualizers.alpha_group_significance(alpha_diversity=core_diversity.faith_pd_vector, metadata= filtered_meta_data)
#faiths_div.visualization

In [57]:
shannon_div.visualization.save('{}/shannon-group-significance.qzv'.format(diversity_dir))
faiths_div.visualization.save('{}/faith-pd-group-significance.qzv'.format(diversity_dir))

'../results/test/2020_05_06/diversity/faith-pd-group-significance.qzv'

### B. Beta diversity significance testing
unweigted unifrac on emperor<br>
https://forum.qiime2.org/t/plugin-error-from-diversity-alpha-rarefaction/4548/4

In [79]:
tmp_df = filtered_meta_data.to_dataframe()
tmp_df['exercise_frequency'] = tmp_df['exercise_frequency'].map(lambda x: str(x).replace('/', '_'))
filtered_meta_data = qiime2.Metadata(tmp_df)

In [81]:
uw_unifrac_div = diversity.visualizers.beta_group_significance(core_diversity.unweighted_unifrac_distance_matrix,
                                                               metadata=filtered_meta_data.get_column('exercise_frequency'),
                                                               pairwise=True)
uw_unifrac_div.visualization

In [84]:
core_diversity.unweighted_unifrac_emperor

Visualization

In [86]:
uw_unifrac_div.visualization.save('{}/uw-unifrac-exercise-significance.qzv'.format(diversity_dir))
core_diversity.unweighted_unifrac_emperor.save('{}/uw-unifrac-emperor.qzv'.format(diversity_dir))

'../results/test/2020_05_06/diversity/uw-unifrac-emperor.qzv'

# Songbird
 - More work needs to be put into specifying a formula
 - https://github.com/biocore/songbird/
 - https://github.com/biocore/songbird/issues/126
 - https://github.com/knightlab-analyses/reference-frames

In [105]:
# Formula mode
songbird_result = songbird.methods.multinomial(table=feature_table,
                                              metadata=filtered_meta_data,
                                              formula="exercise_frequency",
                                              epochs=1000,
                                              differential_prior=0.1,
                                              summary_interval=1)

100%|██████████| 5000/5000 [00:08<00:00, 556.75it/s]


In [106]:
# Null model
null_result = songbird.methods.multinomial(table=feature_table,
                                              metadata=filtered_meta_data,
                                              formula="1",
                                              epochs=1000,
                                              differential_prior=0.1,
                                              summary_interval=1)

100%|██████████| 5000/5000 [00:08<00:00, 609.95it/s]


In [107]:
songbird.visualizers.summarize_paired(regression_stats=songbird_result.regression_stats, baseline_stats=null_result.regression_stats).visualization

In [104]:
#songbird.visualizers.summarize_single(regression_stats=songbird_result.regression_stats).visualization

In [91]:
#metadata.visualizers.tabulate(songbird_result.differentials.view(qiime2.Metadata)).visualization

## DEICODE
 - https://library.qiime2.org/plugins/deicode/19/
 - https://github.com/biocore/DEICODE
 - https://forum.qiime2.org/t/robust-aitchison-pca-beta-diversity-with-deicode/8333
 - https://nbviewer.jupyter.org/github/biocore/DEICODE/blob/master/ipynb/tutorials/moving-pictures.ipynb
 - https://nbviewer.jupyter.org/github/biocore/DEICODE/blob/master/ipynb/tutorials/moving-pictures-standalone-cli-and-api.ipynb

In [111]:
deicode_result = deicode.methods.rpca(table=feature_table)

In [131]:
emperor.visualizers.biplot(biplot=deicode_result.biplot, sample_metadata=filtered_meta_data, feature_metadata=try_did).visualization

## Qurro
 - https://github.com/biocore/qurro
 - https://nbviewer.jupyter.org/github/biocore/qurro/blob/master/example_notebooks/moving_pictures/moving_pictures.ipynb

In [133]:
taxonomic_class.view(qiime2.Metadata).to_dataframe()

Unnamed: 0_level_0,Taxon,Confidence
Feature ID,Unnamed: 1_level_1,Unnamed: 2_level_1
TACGTAGGTGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGGACTGCAAGTCAGATGTGAAAACTATGGGCTCAACCCATAGCCTGCATTTGAAACTGTAGTTCTTGAGTGTCGGAGAGGCAATCGGAATTCCGTG,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.9984142153389206
TACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCTATGGGCTCAACCCATAGCCTGCATTTGAAACTGCAGAGCTTGAGTGAAGTAGAGGCAGGCGGAATTCCCCG,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.8512967814067061
TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGACCGGCAAGTTGGAAGTGAAATCCATGGGCTCAACCCGTGAATTGCTTTCAAAACTGCTGGCCTCGAGTAGTGCAGAGGTAGGTGGAATTCCCGG,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.9999999274329998
TACGTAGGTGGCGAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGGCATTTAAGTCAGATGTGAAATACCGGGGCTCAACCCCGGGGCTGCATTTGAAACTGAAAGTCTTGAGTGCCGGAGAGGAAAGCGGAATTCCTAG,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.7711965842506433
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAGACAAGTCTGAAGTGAAAGCCCGGGGCTCAACCACGGGACTGCTTTGGAAACTGTGTTGCTAGAGTGCTGGAGAGGTAAGCGGAATTCCTAG,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.9255721817322728
...,...,...
TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGGCTCATAAGTCAGCGGTGAAATCCCAATGCTTAACATTGGAACTGCCGTTGAAACTGTCAGCCTTGAGTATAGATGAAGTAGGCGGAATTCGTTG,k__Bacteria; p__Bacteroidetes; c__Bacteroidia;...,0.936409222972631
TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGCTGAGCAAGTCAGAAGTGAAAGCCCGCGGCTTAACTGCGGGACTGCTTTTGAAACTGCCCAGCTTGATTGCCGGAGAAGTAAGTGGAATTCCTAG,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.7492683012014975
TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGATGCTCAACATCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGG,k__Bacteria; p__Bacteroidetes; c__Bacteroidia;...,0.9999997893655115
TACGTAGGTTGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGTAGGCGGAGATGCAAGTTAGGAGTGAAATCTATGGGCTCAACCCATAAACTGCTTCTAAAACTGTGTCCCTTGAGTATCGGAGAGGCAAGCGGAATTCCTAG,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,0.9999602697565383


In [134]:
qurro.visualizers.loading_plot(deicode_result.biplot,
                              table=feature_table,
                              sample_metadata=filtered_meta_data,
                              feature_metadata=try_did).visualization

2543 feature(s) in the BIOM table were not present in the feature rankings.
These feature(s) have been removed from the visualization.
65 sample(s) in the sample metadata file were not present in the BIOM table.
These sample(s) have been removed from the visualization.


## Sample classification

In [None]:
sample_classifier.methods.`

In [135]:
diversity.visualizers.adonis?

[0;31mCall signature:[0m
[0mdiversity[0m[0;34m.[0m[0mvisualizers[0m[0;34m.[0m[0madonis[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdistance_matrix[0m[0;34m:[0m[0mDistanceMatrix[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmetadata[0m[0;34m:[0m[0mMetadata[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mformula[0m[0;34m:[0m[0mStr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpermutations[0m[0;34m:[0m[0mInt[0m [0;34m%[0m [0mRange[0m[0;34m([0m[0;36m1[0m[0;34m,[0m [0;32mNone[0m[0;34m)[0m[0;34m=[0m[0;36m999[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_jobs[0m[0;34m:[0m[0mInt[0m [0;34m%[0m [0mRange[0m[0;34m([0m[0;36m1[0m[0;34m,[0m [0;32mNone[0m[0;34m)[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m([0m[0mVisualization[0m[0;34m,[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mType:[0m           Visualizer
[0;31mString form:[0m    <visualizer qiime2.plugins.diversity.visua