In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import cobra
import matplotlib.pyplot as plt

import sys
sys.path.append("../src/")
import GEMtoGRAPH as gg

In [2]:
model = cobra.io.load_json_model('redYeast_ST8943_fdp1.json')
S = cobra.util.array.create_stoichiometric_matrix(model, array_type='DataFrame')
S.shape

(300, 373)

# MFG

#### Load TFA fluxes

In [3]:
tfa = pd.read_csv('fluxes_for_graph.csv', index_col=0)
tfa = tfa.head(1)

zero_flux = [col for col in tfa.columns if (tfa[col] == 0).all()]

print('Zero flux reactions:',len(zero_flux))

tfa.drop(columns=zero_flux, inplace=True)
print("TFA fluxes:", tfa.shape[1])

# For _reverse reactions we should change the sign of the flux to negative
for col in tfa.columns:
    if '_reverse' in col: tfa[col] = -tfa[col]


tfa.rename(columns={col: col.split("_reverse_")[0] for col in tfa.columns}, inplace=True)

tfa_flux = tfa.iloc[0].values
tfa_flux = pd.DataFrame(columns=['fluxes'], data=tfa_flux)
tfa_flux.index = S.columns

Zero flux reactions: 373
TFA fluxes: 373


### Create Graph

In [4]:
M, S_2m, G = gg.MFG(S, model, tfa_flux)

# nodes: 746 
# edges: 6157


In [5]:
# Remove isolated nodes from G
isolated_nodes = list(nx.isolates(G))
G.remove_nodes_from(isolated_nodes)

print("# nodes:", G.number_of_nodes(), "\n# edges:", G.number_of_edges())

# nodes: 373 
# edges: 6157


## Read ORACLE's data

In [6]:
sigma = pd.read_csv('saturations.csv', index_col=0)
gamma = pd.read_csv('gamma.csv', index_col=0)
vmax = pd.read_csv('Vmax_matrix.csv', index_col=0)

In [7]:
gamma = gamma.head(1)

In [8]:
# get the reactions that are the reversible version
rev_rxn = []
for node in list(G.nodes()):
    if node.split("?")[0] == 'rev': rev_rxn.append(node.split("?")[1])

# rename the reactions of gamma; if it's the reversible one add rev? to the column name
for col in gamma.columns:
    if col in rev_rxn: gamma.rename(columns={col:'rev?'+col}, inplace=True)

In [9]:
listA = list(G.nodes())
listB = gamma.columns

print('In A but not in B:', [item for item in listA if item not in listB])
print()
print('In B but not in A:', [item for item in listB if item not in listA])

In A but not in B: ['EX_lac__D_e', 'EX_mal__L_e', 'EX_akg_e', 'EX_2phetoh_e', 'EX_acald_e', 'EX_ac_e', 'EX_gam6p_e', 'EX_co2_e', 'EX_cit_e', 'EX_etoh_e', 'EX_fum_e', 'EX_gly_e', 'EX_gcald_e', 'EX_glx_e', 'EX_id3acald_e', 'EX_ala__L_e', 'EX_asn__L_e', 'EX_asp__L_e', 'EX_cys__L_e', 'EX_glu__L_e', 'EX_gln__L_e', 'EX_phe__L_e', 'EX_ser__L_e', 'EX_trp__L_e', 'EX_tyr__L_e', 'EX_oaa_e', 'EX_pacald_e', 'EX_pyr_e', 'EX_succ_e', 'EX_ind3eth_e', 'EX_h2o_e', 'EX_g6p_e', 'EX_g1p_e', 'EX_2pg_e', 'EX_pser__L_e', 'EX_ppi_e', 'EX_pep_e', 'EX_cbp_e', 'EX_6pgc_e', 'EX_3pg_e', 'EX_cmp_e', 'GROWTH', 'EX_ccm_e', 'EX_pca_e', 'rev?EX_nh4_e', 'rev?EX_glc__D_e', 'rev?EX_h_e', 'rev?EX_fe2_e', 'rev?EX_o2_e', 'rev?EX_pi_e', 'rev?EX_k_e', 'rev?EX_na1_e', 'rev?EX_so4_e', 'rev?EX_cl_e', 'rev?EX_cu2_e', 'rev?EX_mn2_e', 'rev?EX_zn2_e', 'rev?EX_mg2_e', 'rev?EX_ca2_e']

In B but not in A: []


#### Add `gamma` values as Graph node features

In [None]:
for node in gamma.columns:
    try:
        G.nodes[node]['gamma'] =  gamma[node].values[0]
    except KeyError:
        pass

In [None]:
no_gamma_nodes = [node for node, data in G.nodes(data=True) if not data]

for node in no_gamma_nodes: G.nodes[node]['gamma'] = np.nan

## We have the `Networkx` Graph G

In [None]:
print(G.number_of_nodes(), G.number_of_edges())

In [None]:
def plot_degree_dist(G):
    degrees = [G.degree(n) for n in G.nodes()]
    plt.hist(degrees, bins=25)
    plt.title('Node degree Histogram')
    plt.show()

plot_degree_dist(G)

In [None]:
degree_centrality = nx.degree_centrality(G)
sorted_degree_centrality = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)
sorted_degree_centrality[:10]

In [None]:
from operator import itemgetter

def orderDict(x: dict, desc=True):
    return sorted(x.items(), key=itemgetter(1), reverse=desc)

indg = orderDict(nx.in_degree_centrality(G))

ccen = orderDict(nx.closeness_centrality(G))

betcen = orderDict(nx.betweenness_centrality(G))

# g_single = nx.Graph(G.to_undirected())

pgrk = orderDict(nx.pagerank(G))

hubs, autr = nx.hits(G)

hubs = orderDict(hubs)
autr = orderDict(autr)

import pandas as pd

col_names = ['in-degree', 'closeness', 'betweness', 'page rank', 'autr', 'hubs']

df = pd.DataFrame(columns=col_names)

df['in-degree'] = [n for n, v in indg]
df['closeness'] = [n for n, v in ccen]
df['betweness'] = [n for n, v in betcen]
df['page rank'] = [n for n, v in pgrk]
df['autr'] = [n for n, v in autr]
df['hubs'] = [n for n, v in hubs]

print('Top:\n')
display(df.head(10))
print('Bottom:\n')
display(df.tail(10))


## Node2Vec 

In [None]:
G_labels = nx.convert_node_labels_to_integers(G)

In [None]:
from karateclub import Node2Vec

" Perform node embedding using Node2Vec "
N2vec_model = Node2Vec(walk_number=10, walk_length=80,p=0.9 ,q=0.1,dimensions=12)
N2vec_model.fit(G_labels)
N2Vec_embedding = N2vec_model.get_embedding()
print('Embedding array shape (nodes x features):',N2Vec_embedding.shape )

In [None]:
pd.DataFrame(index=list(G.nodes), data=N2Vec_embedding)

## Networkx to Torch Geometric

In [None]:
import torch
from torch_geometric.utils.convert import from_networkx

device = 'cuda' if torch.cuda.is_available() else 'cpu'

pyg_graph = from_networkx(G, group_node_attrs=all, group_edge_attrs=all)
print(pyg_graph)
pyg_graph.num_nodes ,pyg_graph.num_edges

In [None]:
sigma.head(1)

In [None]:
tfa.head(1)

In [None]:
rxn = []
for col in sigma.columns:
    r = col.split("_")[3:]
    
    rxn.append("_".join(r))

In [None]:
G.number_of_nodes()

In [None]:
d = tfa.head(1)

non_zero_cols = d.astype(bool).sum(axis=0)

d.loc[:, non_zero_cols != 0]

In [None]:
rxn_tfa = tfa.loc[:, non_zero_cols != 0].columns

In [None]:
(set(rxn_tfa.values) - set(np.unique(rxn)))

In [None]:
d = gamma.head(1)

non_zero_cols = d.astype(bool).sum(axis=0)

d.loc[:, non_zero_cols != 0]

In [None]:
vmax