1. Setup and Imports <a name="1"></a>

First, set up the environment by importing necessary libraries and adjusting the system path to include the module directory.

In [1]:
import os
import sys
import pandas as pd

# Add the path to the 'gfop' directory
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import the FoodCounts class
from foodflows import FoodFlows
from visualization import visualize_sankey

2. Loading GNPS Network Data <a name="2"></a>

Load a sample GNPS network file for demonstration purposes.

In [2]:
# Define the path to the sample GNPS network file
gnps_network_path = os.path.join(module_path, "data", "sample_gnps_vegomn.tsv")
print(gnps_network_path)
# Check if the file exists
if not os.path.exists(gnps_network_path):
    raise FileNotFoundError(f"GNPS network file not found at {gnps_network_path}")

# Read the GNPS network data
gnps_network_df = pd.read_csv(gnps_network_path, sep="\t")

# Display the first few rows
print("Sample GNPS Network Data:")
gnps_network_df.head()

c:\Users\amca2\Documents\GitHub\gnps_rdd\gfop\data\sample_gnps_vegomn.tsv
Sample GNPS Network Data:


Unnamed: 0,AllGroups,DefaultGroups,EvenOdd,G1,G2,G3,G4,G5,G6,GNPSLinkout_Cluster,...,SpectrumID,UniqueFileSources,UniqueFileSourcesCount,cluster index,componentindex,number of spectra,parent mass,precursor charge,precursor mass,sum(precursor intensity)
0,,G1,0,2,0,0,0,0,0,https://gnps.ucsd.edu//ProteoSAFe/result.jsp?t...,...,,NIST_POS_Samp_07-03.mzXML|NIST_POS_Samp_07-02....,2,2,-1,2,301.198,3,101.066,873428.0
1,,"G1,G4",0,56,0,0,26,0,0,https://gnps.ucsd.edu//ProteoSAFe/result.jsp?t...,...,,NIST_POS_Samp_11-01.mzXML|NIST_POS_Samp_10-03....,36,3,-1,82,203.061,2,102.03,285315000.0
2,,"G1,G4",0,54,0,0,13,0,0,https://gnps.ucsd.edu//ProteoSAFe/result.jsp?t...,...,,NIST_POS_Samp_11-01.mzXML|NIST_POS_Samp_10-03....,27,8,467,67,219.033,2,110.016,97144900.0
3,,G4,0,0,0,0,11,0,0,https://gnps.ucsd.edu//ProteoSAFe/result.jsp?t...,...,,G000073461_170522182039.mzXML|G000073462_17052...,3,9,467,11,221.071,2,111.035,45839100.0
4,,"G1,G4",0,1,0,0,25,0,0,https://gnps.ucsd.edu//ProteoSAFe/result.jsp?t...,...,,G86787_1x_RG10_01_23372.mzXML|RB5_RB5_01_49070...,26,10,-1,26,904.661,8,113.958,1133780.0


3. Initializing the FoodFlows Class <a name="3"></a>

Initialize the FoodFlows class by providing the GNPS network file path, sample types, group names, max hierarchy level

In [3]:
food_flows = FoodFlows(gnps_network_path, "simple", ["G1", "G4"], 4)

In [4]:
food_flows.processes.head()

Unnamed: 0_level_0,level
id,Unnamed: 1_level_1
algae_1,1
animal_1,1
plant_1,1
algae_2,2
animal_2,2


In [5]:
food_flows.flows.head()

Unnamed: 0,source,target,value,type
0,algae_1,algae_2,196,algae_2
1,animal_1,animal_2,7576,animal_2
2,animal_1,invertebrate_2,130,invertebrate_2
3,plant_1,fruit_2,14313,fruit_2
4,plant_1,vegetable/herb_2,5023,vegetable/herb_2


3. Visualize Sankey Diagram <a name="4"></a>

Use the food flows class to visualize the Sankey Diagram

In [6]:
color_file_path = os.path.join(module_path, "data", "sample_type_hierarchy.csv")

In [7]:
visualize_sankey(food_flows, color_file_path)

  color_df = pd.read_csv(color_mapping_file, sep="\;")
