In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import cobra
import matplotlib.pyplot as plt

import sys
sys.path.append("../src/")
import GEMtoGRAPH as gg

In [2]:
model = cobra.io.load_json_model('redYeast_ST8943_fdp1.json')
S = cobra.util.array.create_stoichiometric_matrix(model, array_type='DataFrame')

In [3]:
S.shape

(300, 373)

# MFG

#### Load TFA fluxes

In [37]:
tfa = pd.read_csv('fluxes_for_graph.csv', index_col=0)
tfa = tfa.head(1)

zero_flux = [col for col in tfa.columns if (tfa[col] == 0).all()]

print('Zero flux reactions:',len(zero_flux))

tfa.drop(columns=zero_flux, inplace=True)
print("TFA fluxes:", tfa.shape[1])

tfa.rename(columns={col: col.split("_reverse_")[0] for col in tfa.columns}, inplace=True)

tfa_flux = tfa.iloc[0].values
tfa_flux = pd.DataFrame(columns=['fluxes'], data=tfa_flux)

Zero flux reactions: 373
TFA fluxes: 373


### Create Graph

In [38]:
M, G = gg.MFG(S, model, tfa_flux)

# nodes: 746 
# edges: 5495


In [39]:
# Remove isolated nodes from G
isolated_nodes = list(nx.isolates(G))
G.remove_nodes_from(isolated_nodes)

print("# nodes:", G.number_of_nodes(), "\n# edges:", G.number_of_edges())

# nodes: 324 
# edges: 5495


## Read ORACLE's data

In [40]:
sigma = pd.read_csv('saturations.csv', index_col=0)
gamma = pd.read_csv('gamma.csv', index_col=0)
vmax = pd.read_csv('Vmax_matrix.csv', index_col=0)

In [69]:
gamma = gamma.head(1)

In [70]:
listA = list(G.nodes())
listB = gamma.columns

print('In A but not in B:', [item for item in listA if item not in listB])
print()
print('In B but not in A:', [item for item in listB if item not in listA])

In A but not in B: ['EX_akg_e', 'EX_ac_e', 'EX_co2_e', 'EX_etoh_e', 'EX_glx_e', 'EX_h_e', 'EX_id3acald_e', 'EX_oaa_e', 'EX_pacald_e', 'EX_na1_e', 'EX_ind3eth_e', 'GROWTH', 'EX_ccm_e', 'EX_pca_e']

In B but not in A: ['2PHETOHt', 'DHPTtm', 'ETOHtm', 'GCALDt']


#### Add `gamma` values as Graph node features

In [87]:
for node in gamma.columns:
    try:
        G.nodes[node]['gamma'] =  gamma[node].values[0]
    except KeyError:
        pass

## Node2Vec 

In [97]:
G_labels = nx.convert_node_labels_to_integers(G)

In [107]:
from karateclub import Node2Vec

" Perform node embedding using Node2Vec "
N2vec_model = Node2Vec(walk_number=10, walk_length=80,p=0.9 ,q=0.1,dimensions=12)
N2vec_model.fit(G_labels)
N2Vec_embedding = N2vec_model.get_embedding()
print('Embedding array shape (nodes x features):',N2Vec_embedding.shape )

Embedding array shape (nodes x features): (324, 12)


In [114]:
pd.DataFrame(index=list(G.nodes), data=N2Vec_embedding)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
D_LACDcm,0.152673,-1.582578,0.171431,0.870529,0.084798,-1.121653,0.199604,0.360031,1.975400,-0.159981,0.348107,-1.209086
D_LACDm,2.901061,-1.260872,2.773383,0.073585,0.103813,-0.527987,0.763400,-0.236138,1.266611,-0.193936,1.038656,-1.285063
L_LACD2cm,0.499322,-1.201463,0.609645,1.065481,0.501300,-1.012808,0.420052,0.004838,1.982200,0.609042,0.079180,-1.177679
AATA,0.887076,0.906228,-0.064284,1.684413,-0.117390,0.243749,-3.344004,0.178422,1.684238,2.139032,-2.921769,0.477364
DDPAm,-0.189950,-1.867302,1.486501,0.666555,0.499302,-0.132651,1.463656,0.710021,1.161970,1.168907,-1.802099,-1.624686
...,...,...,...,...,...,...,...,...,...,...,...,...
CaCatA,0.129344,0.237201,0.101826,-0.032090,-0.072518,0.226322,0.100276,0.088891,0.507237,0.045725,-0.159921,-0.114528
ccm2tp,0.164597,0.139785,0.102026,-0.026981,-0.299533,0.318383,0.154979,0.165683,0.621883,-0.041730,-0.272013,-0.216990
pca2tp,0.050759,0.100302,0.148977,-0.025947,-0.027465,0.028529,0.116237,0.125884,0.160650,0.103817,-0.092996,-0.095831
EX_ccm_e,0.523369,-0.028051,0.076301,0.480511,-2.649509,3.209574,2.601909,-1.450254,2.772089,-0.978717,-1.154149,-1.873556


In [None]:
df = pd.concat([sigma, gamma, vmax, tfa], axis=1)
df.head(1)

In [None]:
sigma.head(1)

In [None]:
tfa.head(1)

In [None]:
rxn = []
for col in sigma.columns:
    r = col.split("_")[3:]
    
    rxn.append("_".join(r))

In [None]:
G.number_of_nodes()

In [None]:
d = tfa.head(1)

non_zero_cols = d.astype(bool).sum(axis=0)

d.loc[:, non_zero_cols != 0]

In [None]:
rxn_tfa = tfa.loc[:, non_zero_cols != 0].columns

In [None]:
(set(rxn_tfa.values) - set(np.unique(rxn)))

In [None]:
d = gamma.head(1)

non_zero_cols = d.astype(bool).sum(axis=0)

d.loc[:, non_zero_cols != 0]

In [None]:
vmax