Imports

In [None]:
import os
import pandas as pd
import numpy as np

from micom.media import minimal_medium

from micom import Community

basePath = "/home/arog/models"

Dataset for Micom

In [None]:
# Load both datasets
# Replace these paths with your actual file paths
df_ids = pd.read_csv("/home/arog/Documents/GitHub/HGMB_Project/datasets/treated/models_final.csv")         # Dataset containing "Unique ID" column
df_data = pd.read_csv("/home/arog/Documents/GitHub/HGMB_Project/datasets/GFKB/GutFeelingKnowledgeBase-v4-Epilepsy_Data.csv")  # Dataset to filter based on "Unique ID"

# Set the first row as the new header
df_data.columns = df_data.iloc[0]
df_data = df_data[1:].reset_index(drop=True)

# Ensure the key column is correctly named
id_column = "Unique ID"
id2_column = "uniqueID"

# Merge instead of using isin
merged_df = df_ids.merge(df_data, left_on=id_column, right_on=id2_column, how='inner')
merged_df = merged_df.rename(columns={'UP name_x': 'UP name'})
merged_df = merged_df.drop(columns=['UP name_y']).iloc[:, :20]

# Convert to numeric (invalid strings become NaN)
merged_df["Mean_Control_Before_Diet"] = pd.to_numeric(merged_df["Mean_Control_Before_Diet"], errors="coerce")

# Then compute the relative abundance
merged_df["Relative_Abundance_Control_Before_Diet"] = (
    merged_df["Mean_Control_Before_Diet"] / merged_df["Mean_Control_Before_Diet"].sum()
)


merged_df.columns

Index(['Unique ID', 'UP name', 'NCBI taxonomy name',
       'Assembly ID (UP matched)', 'Present in GFKB v3 (Y/N)',
       'Present in GFKB v5 (Y/N)', 'BiGG Model Link', 'BiGG ID', 'uniqueID',
       'Lineage( full )', 'Median_Control_Before_Diet',
       'Mean_Control_Before_Diet', 'Standard_Deviation_Control_Before_Diet',
       'Interquartile_Range_Control_Before_Diet', 'Range_Control_Before_Diet',
       '0%_Control_Before_Diet', '25%_Control_Before_Diet',
       '50%_Control_Before_Diet', '75%_Control_Before_Diet',
       '100%_Control_Before_Diet', 'Relative_Abundance_Control_Before_Diet'],
      dtype='object')

In [None]:
df_ids

Unnamed: 0,Unique ID,UP name,NCBI taxonomy name,Assembly ID (UP matched),Present in GFKB v3 (Y/N),Present in GFKB v5 (Y/N),BiGG Model Link,BiGG ID
0,UP000002032_CP010816,Escherichia coli (strain B / BL21-DE3),Escherichia coli,GCA_000023665.1,Y,N,http://bigg.ucsd.edu/models/iEC1356_Bl21DE3,iEC1356_Bl21DE3
1,UP000008199_ AP009240,Escherichia coli (strain SE11),Escherichia coli SE11,GCA_000010385.1,N,Y,http://bigg.ucsd.edu/models/iECSE_1348,iECSE_1348
2,UP000007011_CP000970,Escherichia coli (strain SMS-3-5 / SECEC),Escherichia coli SMS-3-5,GCA_000019645.1,Y,N,http://bigg.ucsd.edu/models/iEcSMS35_1347,iEcSMS35_1347
3,UP000001952_CP000243,Escherichia coli (strain UTI89 / UPEC),Escherichia coli UTI89,GCA_000013265.1,Y,N,http://bigg.ucsd.edu/models/iUTI89_1310,iUTI89_1310
4,UP000007097_CP007394,Escherichia coli O17:K52:H18 (strain UMN026 / ...,Escherichia coli,GCA_000026325.2,Y,N,http://bigg.ucsd.edu/models/iECUMN_1333,iECUMN_1333
5,UP000032727_AP009378,Escherichia coli O25b:H4-ST131,Escherichia coli SE15,GCA_000285655.3,Y,N,http://bigg.ucsd.edu/models/iECSF_1327,iECSF_1327
6,UP000001410_AE014075,Escherichia coli O6:H1 (strain CFT073 / ATCC 7...,Escherichia coli CFT073,GCA_000007445.1,Y,N,http://bigg.ucsd.edu/models/ic_1306,ic_1306
7,UP000006877_CP010371,Escherichia coli O78:H11 (strain H10407 / ETEC),Escherichia coli,GCA_000210475.1,Y,N,http://bigg.ucsd.edu/models/iETEC_1333,iETEC_1333
8,UP000008614_CU651637,Escherichia coli O83:H1 (strain NRG 857C / AIEC),Escherichia coli LF82,GCA_000183345.1,Y,N,http://bigg.ucsd.edu/models/iNRG857_1313,iNRG857_1313
9,UP000008464_CP002729,Escherichia coli UMNK88,Escherichia coli UMNK88,GCA_000212715.2,Y,N,http://bigg.ucsd.edu/models/iUMNK88_1353,iUMNK88_1353


In [None]:
basepath = "/home/arog/models/xml"

taxonomy = {
    "id": list(merged_df["BiGG ID"]),
    "file": [f"{basepath}/{x}.xml" for x in merged_df["BiGG ID"]],
    "abundance": list(merged_df["Relative_Abundance_Control_Before_Diet"])
}

taxonomy_df = pd.DataFrame(taxonomy)
taxonomy_df.to_csv("/home/arog/Documents/GitHub/HGMB_Project/datasets/taxonomy.csv", index=False)

Micom Simulation

In [None]:
taxonomy = pd.read_csv("/home/arog/Documents/GitHub/HGMB_Project/datasets/taxonomy.csv", delimiter=",")
taxonomy

Unnamed: 0,id,file,abundance
0,iEC1356_Bl21DE3,/home/arog/models/xml/iEC1356_Bl21DE3.xml,0.025104
1,iEcSMS35_1347,/home/arog/models/xml/iEcSMS35_1347.xml,0.543917
2,iUTI89_1310,/home/arog/models/xml/iUTI89_1310.xml,0.043361
3,iECUMN_1333,/home/arog/models/xml/iECUMN_1333.xml,0.171923
4,iECSF_1327,/home/arog/models/xml/iECSF_1327.xml,0.072649
5,ic_1306,/home/arog/models/xml/ic_1306.xml,0.023221
6,iETEC_1333,/home/arog/models/xml/iETEC_1333.xml,0.057054
7,iNRG857_1313,/home/arog/models/xml/iNRG857_1313.xml,0.047165
8,iUMNK88_1353,/home/arog/models/xml/iUMNK88_1353.xml,0.015606


In [None]:
com = Community(taxonomy)

Output()