In [None]:
import networkx as nx
from gnpsdata import taskresult
import os
from gnpsdata import workflow_fbmn
import pandas as pd
import csv
from qiime2 import Visualization

In [None]:
task = "cf6e14abf5604f47b28b467a513d3532"

In [None]:
# Downloading raw data from GNPS
def download_graphml(task, output_file):
    taskresult.download_task_resultfile(task, "gnps_molecular_network_graphml/", output_file)

def get_graphml_network(task):
    taskresult.download_task_resultfile(task, "gnps_molecular_network_graphml/", "temp.graphml")

    G = nx.read_graphml("temp.graphml")

    return G

def download_quantification(task, output_file):
    taskresult.download_task_resultfile(task, "quantification_table/", output_file)

def download_metadata(task, output_file):
    taskresult.download_task_resultfile(task, "metadata_merged/", output_file)

def download_mgf(task, output_file):
    taskresult.download_task_resultfile(task, "spectra_reformatted/", output_file)
    
# Qiime2 Data
def download_qiime2(task, output_file):
    taskresult.download_task_resultfile(task, "qiime2_output/qiime2_table.qza", output_file)

def download_qiime2_manifest(task, output_file):
    taskresult.download_task_resultfile(task, "qiime2_output/qiime2_manifest.tsv", output_file)

def download_qiime2_metadata(task, output_file):
    taskresult.download_task_resultfile(task, "qiime2_output/qiime2_metadata.tsv", output_file)

In [None]:
# Download quantification and manifest
os.makedirs("../data", exist_ok=True)
download_quantification(task, "../data/quant.csv")
download_qiime2_manifest(task, "../data/manifest.csv")
# Downloading metadata
workflow_fbmn.download_metadata(task, "../data/unprocessed_metadata.tsv")

# Changing Metadata and Manifest Column name

In [None]:
#read metadata file
metadata = pd.read_csv("../data/unprocessed_metadata.tsv", sep = "\t", index_col=False)
#rename 1st column to "#OTU ID
metadata = metadata.rename(columns={"filename":"#OTU ID"})
#convert back to .tsv
metadata.to_csv('../data/metadata.tsv', sep="\t", index=False)

# Import Into Qiime2
## Convert .tsv to .biom

In [None]:
# Replace the following file names with your own
input_file = '../data/quant.csv'
output_file = '../data/biom_quant.tsv'

# Open the input CSV file
with open(input_file, 'r') as csv_file:
    reader = csv.reader(csv_file)
    header = next(reader)  # Get the header row
    
    # Get the indexes of the first three columns
    indexes = [0, 1, 2]
    
    # Get the indexes of columns with "Peak area" in the header
    peak_area_indexes = [i for i in range(len(header)) if 'Peak area' in header[i]]
    
    # Open the output TSV file
    with open(output_file, 'w', newline='') as tsv_file:
        writer = csv.writer(tsv_file, delimiter='\t')
        
        # Write the header row with updated column names
        new_header = ['#OTU ID'] + [header[i].replace('Peak area', '') for i in peak_area_indexes]
        writer.writerow(new_header)
        
        # Loop through the remaining rows of the input CSV file
        for row in reader:
            # Extract the values from the first three columns
            otu_id = row[0]
            
            # Extract the values from columns with "Peak area" in the header
            peak_area_values = [row[i].replace('Peak area', '') for i in peak_area_indexes]
            
            # Write a row to the output TSV file
            new_row = [otu_id] + peak_area_values
            writer.writerow(new_row)


In [None]:
! biom convert \
  -i ../data/biom_quant.tsv \
  -o ../data/quant.biom --to-hdf5

In [None]:
! qiime tools import \
  --input-path ../data/quant.biom \
  --type 'FeatureTable[Frequency]' \
  --input-format BIOMV210Format \
  --output-path ../data/qiime_table.qza

# ANOVA

In [None]:
! qiime longitudinal anova \
  --m-metadata-file ../data/metadata.tsv \
  --p-formula 'ATTRIBUTE_Year~ATTRIBUTE_Sample_Area+ATTRIBUTE_Latitude' \
  --p-sstype 'I' \
  --o-visualization ../data/metadata.qzv

# Visualization

In [None]:
Visualization.load('../data/metadata.qzv')

# Principal Coordinate Analysis (PCoA) & Distance Matrix

In [None]:
! qiime diversity beta \
  --i-table ../data/qiime_table.qza \
  --p-metric canberra_adkins \
  --o-distance-matrix ../data/distance_matrix.qza

## PCoA

In [None]:
! qiime diversity pcoa \
  --i-distance-matrix ../data/distance_matrix.qza \
  --o-pcoa ../data/pcoa.qza

# Emperor plot

In [None]:
! qiime emperor plot \
  --i-pcoa ../data/pcoa.qza \
  --m-metadata-file ../data/metadata.tsv \
  --o-visualization ../data/emperor_plot.qzv \
  --p-ignore-missing-samples

# Visualization

In [None]:
Visualization.load("../data/emperor_plot.qzv")