In [1]:
import sys
import os
# Add the path to the 'gfop' directory (one level up from 'notebooks')
project_path = os.path.abspath('..')  # Go up one level to 'gfop'
utils_path = os.path.abspath('../utils')
sys.path.append(project_path)
sys.path.append(utils_path)
# Import the FoodCounts class
from foodcounts import FoodCounts
from utils.utils import food_counts_to_wide, update_groups_from_metadata

In [2]:
# Import necessary libraries
import pandas as pd
# Path to the sample GNPS network file and metadata
gnps_network_path = os.path.join('..', 'data', 'sample_gnps_vegomn.tsv')

In [3]:
# Create an instance of the FoodCounts class
fc = FoodCounts(
    gnps_network=gnps_network_path, 
    sample_types='simple', 
    all_groups=['G1'], 
    some_groups=['G4'],
    levels=6
)

In [4]:
test = fc.filter_counts(food_types=['dairy'])
test

Unnamed: 0,filename,food_type,count,level,group
67284,NIST_POS_Samp_07-03.mzXML,dairy,1247,3,G1
67285,NIST_POS_Samp_07-02.mzXML,dairy,1207,3,G1
67286,NIST_POS_Samp_11-01.mzXML,dairy,1137,3,G1
67287,NIST_POS_Samp_10-03.mzXML,dairy,1101,3,G1
67288,NIST_POS_Samp_16-03.mzXML,dairy,1090,3,G1
67289,NIST_POS_Samp_17-02.mzXML,dairy,1052,3,G1
67290,NIST_POS_Samp_08-02.mzXML,dairy,1256,3,G1
67291,NIST_POS_Samp_13-03.mzXML,dairy,1247,3,G1
67292,NIST_POS_Samp_09-02.mzXML,dairy,1295,3,G1
67293,NIST_POS_Samp_08-03.mzXML,dairy,1230,3,G1


In [5]:
food_counts_to_wide(test)

Unnamed: 0_level_0,dairy,group
filename,Unnamed: 1_level_1,Unnamed: 2_level_1
NIST_POS_Samp_07-01.mzXML,1457.0,G1
NIST_POS_Samp_07-02.mzXML,1207.0,G1
NIST_POS_Samp_07-03.mzXML,1247.0,G1
NIST_POS_Samp_08-01.mzXML,1351.0,G1
NIST_POS_Samp_08-02.mzXML,1256.0,G1
NIST_POS_Samp_08-03.mzXML,1230.0,G1
NIST_POS_Samp_09-01.mzXML,1124.0,G1
NIST_POS_Samp_09-02.mzXML,1295.0,G1
NIST_POS_Samp_09-03.mzXML,1183.0,G1
NIST_POS_Samp_10-01.mzXML,925.0,G1


In [6]:
food_counts_to_wide(fc.counts)

ValueError: Multiple levels found in the data. Please specify a level to convert to wide format.

In [8]:
metadata_path = os.path.join('..', 'data', 'veg_omn_metadata.csv')

In [11]:
update_group_with_metadata_column(test, metadata_file=metadata_path, merge_column='new_group')

Unnamed: 0,filename,food_type,count,level,group
0,NIST_POS_Samp_07-03.mzXML,dairy,1247,3,G1
1,NIST_POS_Samp_07-02.mzXML,dairy,1207,3,G1
2,NIST_POS_Samp_11-01.mzXML,dairy,1137,3,G2
3,NIST_POS_Samp_10-03.mzXML,dairy,1101,3,G2
4,NIST_POS_Samp_16-03.mzXML,dairy,1090,3,G2
5,NIST_POS_Samp_17-02.mzXML,dairy,1052,3,G2
6,NIST_POS_Samp_08-02.mzXML,dairy,1256,3,G1
7,NIST_POS_Samp_13-03.mzXML,dairy,1247,3,G1
8,NIST_POS_Samp_09-02.mzXML,dairy,1295,3,G1
9,NIST_POS_Samp_08-03.mzXML,dairy,1230,3,G1


In [12]:
update_groups_from_metadata(food_counts=fc.counts,metadata_file=metadata_path,merge_column='new_group')

Unnamed: 0,filename,food_type,count,level,group
0,NIST_POS_Samp_07-03.mzXML,11442.G72441,11,0,G1
1,NIST_POS_Samp_07-02.mzXML,11442.G72441,9,0,G1
2,NIST_POS_Samp_11-01.mzXML,11442.G72441,13,0,G2
3,NIST_POS_Samp_10-03.mzXML,11442.G72441,12,0,G2
4,NIST_POS_Samp_16-03.mzXML,11442.G72441,10,0,G2
...,...,...,...,...,...
92155,NIST_POS_Samp_09-03.mzXML,zuchini blossom,0,6,G1
92156,NIST_POS_Samp_15-02.mzXML,zuchini blossom,0,6,G1
92157,NIST_POS_Samp_10-01.mzXML,zuchini blossom,5,6,G2
92158,NIST_POS_Samp_09-01.mzXML,zuchini blossom,0,6,G1
