In [1]:
import networkx as nx
from gnpsdata import taskresult
import os
from gnpsdata import workflow_fbmn
import pandas as pd
from qiime2 import Visualization

In [2]:
task = "cf6e14abf5604f47b28b467a513d3532"

In [3]:
# Making sure the data folder exists
os.makedirs("../data", exist_ok=True)

# Downloading GNPS Qiime2 object
workflow_fbmn.download_qiime2(task, "../data/qiime_table.qza")

# Downloading metadata
workflow_fbmn.download_metadata(task, "../data/unprocessed_metadata.tsv")

# Changing Metadata Column Name

In [4]:
#read metadata file
metadata = pd.read_csv("../data/unprocessed_metadata.tsv", sep = "\t", index_col=False)
#rename 1st column to "#OTU ID
metadata = metadata.rename(columns={"filename":"#OTU ID"})
#convert back to .tsv
metadata.to_csv('../data/metadata.tsv', sep="\t", index=False)

# ANOVA

In [5]:
! qiime longitudinal anova \
  --m-metadata-file ../data/metadata.tsv \
  --p-formula "ATTRIBUTE_Year~ATTRIBUTE_Sample_Area+ATTRIBUTE_Latitude" \
  --p-sstype 'I' \
  --o-visualization ../data/metadata.qzv

[32mSaved Visualization to: ../data/metadata.qzv[0m
[0m

# Visualization
Qiime2 visualizations do not work in headless environments, we can view them at https://view.qiime2.org/

In [6]:
# Qiime2 Manipulations Happen after this to mirror the other notebooks

# Principal Coordinate Analysis (PCoA) & Distance Matrix

In [7]:
! qiime diversity beta \
  --i-table ../data/qiime_table.qza \
  --p-metric canberra_adkins \
  --o-distance-matrix ../data/distance_matrix.qza

[32mSaved DistanceMatrix to: ../data/distance_matrix.qza[0m
[0m

## PCoA

In [8]:
! qiime diversity pcoa \
  --i-distance-matrix ../data/distance_matrix.qza \
  --o-pcoa ../data/pcoa.qza

[32mSaved PCoAResults to: ../data/pcoa.qza[0m
[0m

# Emperor plot

In [9]:
! qiime emperor plot \
  --i-pcoa ../data/pcoa.qza \
  --m-metadata-file ../data/metadata.tsv \
  --o-visualization ../data/emperor_plot.qzv

[32mSaved Visualization to: ../data/emperor_plot.qzv[0m
[0m

# Visualization

In [10]:
Visualization.load('../data/emperor_plot.qzv')

# Classifier Data/Heat Map

In [11]:
! qiime sample-classifier classify-samples \
  --i-table ../data/qiime_table.qza \
  --m-metadata-file ../data/metadata.tsv \
  --m-metadata-column ATTRIBUTE_Sample_Area \
  --p-optimize-feature-selection \
  --p-parameter-tuning \
  --p-estimator RandomForestClassifier \
  --p-n-estimators 500 \
  --p-random-state 123 \
  --o-accuracy-results ../data/accuracy_results.qzv \
  --o-feature-importance ../data/feature_importance.qza \
  --o-heatmap ../data/heatmap.qzv \
  --o-model-summary ../data/model_summary.qzv \
  --o-predictions ../data/predictions.qza \
  --o-probabilities ../data/probabilities.qza \
  --o-sample-estimator ../data/sample_estimator.qza \
  --o-test-targets ../data/test_targets.qza \
  --o-training-targets ../data/training_targets.qza 


[31m[1mPlugin error from sample-classifier:

  Missing samples in metadata: {'SD_01-2018_2_b.mzXML ', 'SD_12-2017_27_b.mzXML ', 'SD_01-2018_4_b.mzXML ', 'SD_12-2017_26_b.mzXML ', 'SD_01-2018_8_a.mzXML ', 'SD_12-2017_26_a.mzXML ', 'SD_12-2017_2_b.mzXML ', 'SD_01-2018_9_a.mzXML ', 'SD_01-2018_PPL_Bl_2.mzXML ', 'SD_01-2018_5_a.mzXML ', 'SD_10_2018_12_b.mzXML ', 'SD_10_2018_26_b.mzXML ', 'SD_12-2017_18_a.mzXML ', 'SD_12-2017_22_a.mzXML ', 'SD_01-2018_17_a.mzXML ', 'SD_10_2018_8_b.mzXML ', 'SD_10_2018_29_a.mzXML ', 'SD_12-2017_6_a.mzXML ', 'SD_12-2017_PPL_Bl_1.mzXML ', 'SD_01-2018_13_b.mzXML ', 'SD_01-2018_19_a.mzXML ', 'SD_10_2018_11_a.mzXML ', 'SD_10_2018_2_b.mzXML ', 'SD_01-2018_29_a.mzXML ', 'SD_01-2018_28_b.mzXML ', 'SD_10_2018_17_b.mzXML ', 'SD_12-2017_8_b.mzXML ', 'SD_12-2017_21_a.mzXML ', 'SD_10_2018_6_b.mzXML ', 'SD_12-2017_29_a.mzXML ', 'SD_01-2018_8_b.mzXML ', 'SD_10_2018_5_b.mzXML ', 'SD_10_2018_14_a.mzXML ', 'SD_10_2018_22_b.mzXML ', 'SD_10_2018_14_b.mzXML ', 'SD_01-2018_18_b

# Visualization

In [12]:
Visualization.load('../data/heatmap.qzv')

# PermANOVA

In [13]:
! qiime diversity beta-group-significance \
  --i-distance-matrix ../data/distance_matrix.qza \
  --m-metadata-file ../data/metadata.tsv \
  --m-metadata-column ATTRIBUTE_Sample_Area \
  --o-visualization ../data/permanova.qzv

[32mSaved Visualization to: ../data/permanova.qzv[0m
[0m

# Visualization

In [14]:
Visualization.load("../data/permanova.qzv")