In [1]:
import networkx as nx
from gnpsdata import taskresult
import os
from gnpsdata import workflow_fbmn
import pandas as pd
import csv

In [2]:
task = "cf6e14abf5604f47b28b467a513d3532"

In [3]:
# Downloading raw data from GNPS
def download_graphml(task, output_file):
    taskresult.download_task_resultfile(task, "gnps_molecular_network_graphml/", output_file)

def get_graphml_network(task):
    taskresult.download_task_resultfile(task, "gnps_molecular_network_graphml/", "temp.graphml")

    G = nx.read_graphml("temp.graphml")

    return G

def download_quantification(task, output_file):
    taskresult.download_task_resultfile(task, "quantification_table/", output_file)

def download_metadata(task, output_file):
    taskresult.download_task_resultfile(task, "metadata_merged/", output_file)

def download_mgf(task, output_file):
    taskresult.download_task_resultfile(task, "spectra_reformatted/", output_file)
    
# Qiime2 Data
def download_qiime2(task, output_file):
    taskresult.download_task_resultfile(task, "qiime2_output/qiime2_table.qza", output_file)

def download_qiime2_manifest(task, output_file):
    taskresult.download_task_resultfile(task, "qiime2_output/qiime2_manifest.tsv", output_file)

def download_qiime2_metadata(task, output_file):
    taskresult.download_task_resultfile(task, "qiime2_output/qiime2_metadata.tsv", output_file)

In [4]:
# Download quantification and manifest
os.makedirs("../data", exist_ok=True)
download_quantification(task, "../data/quant.csv")
download_qiime2_manifest(task, "../data/manifest.csv")
# Downloading metadata
workflow_fbmn.download_metadata(task, "../data/unprocessed_metadata.tsv")

# Changing Metadata and Manifest Column name

In [5]:
#read metadata file
metadata = pd.read_csv("../data/unprocessed_metadata.tsv", sep = "\t", index_col=False)
#rename 1st column to "#OTU ID
metadata = metadata.rename(columns={"filename":"#OTU ID"})
#convert back to .tsv
metadata.to_csv('../data/metadata.tsv', sep="\t", index=False)

# Import Into Qiime2
## Convert .tsv to .biom
! source activate qiime2-2019.4 && biom convert -i ../data/gnps_quant/quant.tsv -o quant.biom --to-hdf5

In [6]:
# Replace the following file names with your own
input_file = '../data/quant.csv'
output_file = '../data/biom_quant.tsv'

# Open the input CSV file
with open(input_file, 'r') as csv_file:
    reader = csv.reader(csv_file)
    header = next(reader)  # Get the header row
    
    # Get the indexes of the first three columns
    indexes = [0, 1, 2]
    
    # Get the indexes of columns with "Peak area" in the header
    peak_area_indexes = [i for i in range(len(header)) if 'Peak area' in header[i]]
    
    # Open the output TSV file
    with open(output_file, 'w', newline='') as tsv_file:
        writer = csv.writer(tsv_file, delimiter='\t')
        
        # Write the header row with updated column names
        new_header = ['#OTU ID', 'sample_name', 'abundance'] + [header[i].replace('Peak area', '') for i in peak_area_indexes]
        writer.writerow(new_header)
        
        # Loop through the remaining rows of the input CSV file
        for row in reader:
            # Extract the values from the first three columns
            otu_id = row[0]
            sample_name = row[1]
            abundance = row[2]
            
            # Extract the values from columns with "Peak area" in the header
            peak_area_values = [row[i].replace('Peak area', '') for i in peak_area_indexes]
            
            # Write a row to the output TSV file
            new_row = [otu_id, sample_name, abundance] + peak_area_values
            writer.writerow(new_row)


In [7]:
! biom convert \
  -i ../data/biom_quant.tsv \
  -o ../data/quant.biom --to-hdf5

In [8]:
! qiime tools import \
  --input-path ../data/quant.biom \
  --type 'FeatureTable[Frequency]' \
  --input-format BIOMV210Format \
  --output-path ../data/qiime_table.qza

[32mImported ../data/quant.biom as BIOMV210Format to ../data/qiime_table.qza[0m
[0m

# ANOVA
! source activate qiime2-2019.4 && qiime longitudinal anova \
  --m-metadata-file <path-to-metadata-file.tsv> \
  --p-formula "independent_variable~dependent_variable+dependent_variable" \
  --o-visualization <path-to-visualization.qza>

In [9]:
! qiime longitudinal anova \
  --m-metadata-file ../data/metadata.tsv \
  --p-formula "ATTRIBUTE_Year~ATTRIBUTE_Sample_Area+ATTRIBUTE_Latitude" \
  --p-sstype 'I' \
  --o-visualization ../data/metadata.qzv

[32mSaved Visualization to: ../data/metadata.qzv[0m
[0m

# Visualization
Qiime2 visualizations do not work in headless environments, we can view them at https://view.qiime2.org/

In [10]:
# Qiime2 Manipulations Happen after this to mirror the other notebooks

! qiime

Usage: [94mqiime[0m [OPTIONS] COMMAND [ARGS]...

  QIIME 2 command-line interface (q2cli)
  --------------------------------------

  To get help with QIIME 2, visit https://qiime2.org.

  To enable tab completion in Bash, run the following command or add it to
  your .bashrc/.bash_profile:

      source tab-qiime

  To enable tab completion in ZSH, run the following commands or add them to
  your .zshrc:

      autoload -Uz compinit && compinit
      autoload bashcompinit && bashcompinit
      source tab-qiime

[1mOptions[0m:
  [94m--version[0m   Show the version and exit.
  [94m--help[0m      Show this message and exit.

[1mCommands[0m:
  [94minfo[0m                Display information about current deployment.
  [94mtools[0m               Tools for working with QIIME 2 files.
  [94mdev[0m                 Utilities for developers and advanced users.
  [94malignment[0m           Plugin for generating and manipulating alignments.
  [94mcomposition[0m         Plugin f

# Principal Coordinate Analysis (PCoA) & Distance Matrix
! source activate qiime2-2019.4 && first create distance matrix 
 qiime diversity beta \
  --i-table <path-to-feature-table.qza> \
  --p-metric canberra_adkins \
  --output-dir <path-to-distance-matrix.qza> \

In [11]:
! qiime diversity beta \
  --i-table ../data/qiime_table.qza \
  --p-metric canberra_adkins \
  --o-distance-matrix ../data/distance_matrix.qza

[32mSaved DistanceMatrix to: ../data/distance_matrix.qza[0m
[0m

## PCoA
! source activate qiime2-2019.4 && qiime diversity pcoa \
  --i-distance-matrix <path-to-distance-matrix.qza> \
  --p-number-of-dimensions INTEGER \ #optional 
  --o-pcoa <path-to-artifact.qza>

In [12]:
! qiime diversity pcoa \
  --i-distance-matrix ../data/distance_matrix.qza \
  --o-pcoa ../data/pcoa.qza

[32mSaved PCoAResults to: ../data/pcoa.qza[0m
[0m

# Emperor plot
! source activate qiime2-2019.4 && qiime emperor plot \
--i-pcoa <path_to_pcoa.qza> \
--m-metadata-file <path_to_metadata.tsv> \
--o-visualization emperor_qiime2

In [None]:
! qiime emperor plot \
  --i-pcoa ../data/pcoa.qza \
  --m-metadata-file ../data/metadata.tsv \
  --o-visualization ../data/emperor_plot.qzv \
  --p-ignore-missing-samples

In [None]:
# Visualization

from qiime2 import Visualization
Visualization.load('../data/emperor_plot.qzv')