In [1]:
import pandas as pd
import numpy as np
import json
import networkx as nx
from tqdm import tqdm
from compound import Compound
from reaction import Reaction
from graph import Graph
from data import Data

'''Supress warnings'''
import warnings
warnings.filterwarnings('ignore')

# read data from csv
cpds = pd.read_csv('../GNN_toxic/data/raw/compounds_final.csv', index_col=0) # containing toxicity
rxns = pd.read_csv('data/reactions_final.csv', index_col=0)
pairs = pd.read_csv('data/pairs_final.csv', index_col=0)
cofactors = pd.read_csv('data/original/cofactors_KEGG.csv')

# create class instances
data = Data()
graph = Graph(pairs=pairs)

# Create a Compound object for each row in the DataFrame and add it to the data
for index, row in cpds.iterrows():
    entry = row['Entry']
    name = row['Names']
    formula = row['Formula']
    mw = row['mol_weight']
    smiles = row['SMILES']
    is_cofactor = row['Entry'] in cofactors['Entry'].values
    is_toxic = row['toxic']

    compound = Compound(entry, name, formula, mw, smiles, is_cofactor, is_toxic)
    data.add_element('compound', compound)

# Create a Reaction object for each row in the DataFrame and add it to the data
for index, row in rxns.iterrows():
    entry = row['Entry']
    name = row['Names']
    compounds = row['Compound']
    enzyme = row['EC Number']

    reaction = Reaction(entry, name, compounds, enzyme)
    data.add_element('reaction', reaction)

# CREATE GRAPH
graph.create_graph(data=data, pairs=pairs)

graph.calculate_edge_mol_weight(data)
graph.calculate_smiles_similarity(data)

# nodes: 8591 
# edges: 30026


100%|██████████| 8591/8591 [00:12<00:00, 682.45it/s]
100%|██████████| 30026/30026 [00:00<00:00, 301007.37it/s]
100%|██████████| 30026/30026 [01:20<00:00, 373.28it/s]


In [5]:
butanol_KEGG = 'C06142'

src_list_names = ['acald', 'accoa', 'pep', 'pyr', 'mal__D', 'mal__L', 'fum', 'succ', 'oaa', 'succoa', 'dhap', 'cit']
src_list = ['C00084', 'C00024', 'C00074', 'C00022', 'C00497', 'C00149', 'C00122', 'C00042', 'C00036', 'C00091', 'C00111', 'C00158']

''' Create dataframe with columns the two lists above'''
src = pd.DataFrame({'source': src_list, 'source_name': src_list_names})
src.head(3)

Unnamed: 0,source,source_name
0,C00084,acald
1,C00024,accoa
2,C00074,pep


In [12]:
for s in src['source']:
    path, idx_smi, idx_com = graph.constrained_shortest_path(s, butanol_KEGG, weight='mol_weight')
    
    try: print(path[idx_smi])
    except TypeError: pass

['C00084', 'C00024', 'C00136', 'C01412', 'C06142']
['C00024', 'C00136', 'C01412', 'C06142']
***** No path found between C00074 and C06142 *****
['C00022', 'C00024', 'C00136', 'C01412', 'C06142']
***** No path found between C00497 and C06142 *****
['C00149', 'C00024', 'C00136', 'C01412', 'C06142']
['C00122', 'C00149', 'C00024', 'C00136', 'C01412', 'C06142']
['C00042', 'C00149', 'C00024', 'C00136', 'C01412', 'C06142']
['C00036', 'C00083', 'C00332', 'C00136', 'C01412', 'C06142']
['C00091', 'C00332', 'C00136', 'C01412', 'C06142']
***** No path found between C00111 and C06142 *****
['C00158', 'C00024', 'C00136', 'C01412', 'C06142']
