# Download GNPS Data

In [1]:
import networkx as nx
from gnpsdata import taskresult
import os
from gnpsdata import workflow_fbmn
import pandas as pd
from qiime2 import Visualization

In [2]:
task = "cf6e14abf5604f47b28b467a513d3532"

In [3]:
# Making sure the data folder exists
os.makedirs("../data", exist_ok=True)

# Downloading GNPS Qiime2 object
workflow_fbmn.download_qiime2(task, "../data/qiime_table.qza")

# Downloading metadata
workflow_fbmn.download_metadata(task, "../data/unprocessed_metadata.tsv")

# Changing Metadata Column Name

In [4]:
#read metadata file
metadata = pd.read_csv("../data/unprocessed_metadata.tsv", sep = "\t", index_col=False)
#rename 1st column to "#OTU ID
metadata = metadata.rename(columns={"filename":"#OTU ID"})
#convert back to .tsv
metadata.to_csv('../data/metadata.tsv', sep="\t", index=False)

# Longitudinal ANOVA

In [5]:
p_formula = 'ATTRIBUTE_Year~ATTRIBUTE_Sample_Area+ATTRIBUTE_Latitude'

In [6]:
! qiime longitudinal anova \
  --m-metadata-file ../data/metadata.tsv \
  --p-formula $p_formula \
  --p-sstype 'I' \
  --o-visualization ../data/metadata.qzv

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[31m[1mPlugin error from longitudinal:

  Value must be a nonnegative integer or None

Debug info has been saved to /tmp/qiime2-q2cli-err-7drwvng4.log[0m
[0m

In [8]:
Visualization.load('../data/metadata.qzv')

# Distance Matrix

In [9]:
p_metric = 'canberra_adkins'

In [10]:
! qiime diversity beta \
  --i-table ../data/qiime_table.qza \
  --p-metric $p_metric \
  --o-distance-matrix ../data/distance_matrix.qza

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[32mSaved DistanceMatrix to: ../data/distance_matrix.qza[0m
[0m

# Principal Coordinate Analysis (PCoA)

In [11]:
! qiime diversity pcoa \
  --i-distance-matrix ../data/distance_matrix.qza \
  --o-pcoa ../data/pcoa.qza

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[32mSaved PCoAResults to: ../data/pcoa.qza[0m
[0m

# Emperor plot

In [12]:
! qiime emperor plot \
  --i-pcoa ../data/pcoa.qza \
  --m-metadata-file ../data/metadata.tsv \
  --o-visualization ../data/emperor_plot 


  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[32mSaved Visualization to: ../data/emperor_plot.qzv[0m
[0m

# Visualization

In [13]:
Visualization.load('../data/emperor_plot.qzv')

# Classifier Data/Heat Map

In [14]:
metadata_column = 'ATTRIBUTE_Sample_Area'
estimator = 'RandomForestClassifier'
n_estimators = 500
random_state = 123

In [15]:
! qiime sample-classifier classify-samples \
  --i-table ../data/qiime_table.qza \
  --m-metadata-file ../data/metadata.tsv \
  --m-metadata-column $metadata_column \
  --p-optimize-feature-selection \
  --p-parameter-tuning \
  --p-estimator $estimator \
  --p-n-estimators $n_estimators \
  --p-random-state $random_state \
  --o-accuracy-results ../data/accuracy_results.qzv \
  --o-feature-importance ../data/feature_importance.qza \
  --o-heatmap ../data/heatmap.qzv \
  --o-model-summary ../data/model_summary.qzv \
  --o-predictions ../data/predictions.qza \
  --o-probabilities ../data/probabilities.qza \
  --o-sample-estimator ../data/sample_estimator.qza \
  --o-test-targets ../data/test_targets.qza \
  --o-training-targets ../data/training_targets.qza 


  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[31m[1mPlugin error from sample-classifier:

  Value must be a nonnegative integer or None

Debug info has been saved to /tmp/qiime2-q2cli-err-q9akujfi.log[0m
[0m

# Visualization

In [16]:
Visualization.load('../data/heatmap.qzv')

# PermANOVA

In [17]:
metadata_column_permanova = 'ATTRIBUTE_Sample_Area'

In [18]:
! qiime diversity beta-group-significance \
  --i-distance-matrix ../data/distance_matrix.qza \
  --m-metadata-file ../data/metadata.tsv \
  --m-metadata-column $metadata_column_permanova \
  --o-visualization ../data/permanova.qzv

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[31m[1mPlugin error from diversity:

  Value must be a nonnegative integer or None

Debug info has been saved to /tmp/qiime2-q2cli-err-j1vtdwxp.log[0m
[0m

# Visualization

In [19]:
Visualization.load("../data/permanova.qzv")

ValueError: ../data/permanova.qzv does not exist.

In [23]:
# normalize data
! qiime normalization-plugin normalize-function \
    --i-input-artifact ../data/qiime_table.qza \
    --o-output-artifact ../data/qiime_table_normalized.qza

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[32mSaved FeatureTable[RelativeFrequency] to: ../data/qiime_table_normalized.qza[0m
[0m

In [24]:
! qiime diversity beta \
  --i-table ../data/qiime_table_normalized.qza \
  --p-metric $p_metric \
  --o-distance-matrix ../data/distance_matrix.qza

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
[31m[1mPlugin error from diversity:

  Parameter 'table' requires an argument of type FeatureTable[Frequency]. An argument of type FeatureTable[RelativeFrequency] was passed.

Debug info has been saved to /tmp/qiime2-q2cli-err-_ooor_ae.log[0m
[0m

In [None]:
! qiime emperor plot \
  --i-pcoa ../data/pcoa.qza \
  --m-metadata-file ../data/metadata.tsv \
  --o-visualization ../data/emperor_plot 