In [1]:
import pandas as pd
import networkx as nx
import pickle

In [2]:
G = pickle.load(open('yeast_G.pickle', 'rb'))

print(f'# Nodes: {G.number_of_nodes()} \n# Edges: {G.number_of_edges()}')

# Nodes: 373 
# Edges: 6157


## Add node features

In [3]:
import cobra

cobra_model = cobra.io.load_json_model('redYeast_ST8943_fdp1.json')

In [4]:
# Add number of metabolites in every reaction as a node feature
for node, data in G.nodes(data=True):
    if "rev?" in node: rxn_name = node.split("?")[1]
    else: rxn_name = node
    
    num_metabolites = len(cobra_model.reactions.get_by_id(rxn_name).metabolites)
    data['x'] = num_metabolites

## Read ORACLE's data

In [5]:
sigma = pd.read_csv('saturations.csv', index_col=0)
gamma = pd.read_csv('gamma.csv', index_col=0)
vmax = pd.read_csv('Vmax_matrix.csv', index_col=0)

gamma = gamma.head(1)
sigma = sigma.head(1)
vmax = vmax.head(1)

In [6]:
# get the reactions that are the reversible 
rev_rxn = []
for node in list(G.nodes()):
    if node.split("?")[0] == 'rev': rev_rxn.append(node.split("?")[1])

# rename the reactions of gamma; if it's the reversible one add rev? to the column name
for col in gamma.columns:
    if col in rev_rxn: gamma.rename(columns={col:'rev?'+col}, inplace=True)

In [7]:
listA = list(G.nodes())
listB = gamma.columns.values

print('In Graph but not in gamma:', [item for item in listA if item not in listB])
print()
print('In gamma but not in Graph:', [item for item in listB if item not in listA])

In Graph but not in gamma: ['EX_lac__D_e', 'EX_mal__L_e', 'EX_akg_e', 'EX_2phetoh_e', 'EX_acald_e', 'EX_ac_e', 'EX_gam6p_e', 'EX_co2_e', 'EX_cit_e', 'EX_etoh_e', 'EX_fum_e', 'EX_gly_e', 'EX_gcald_e', 'EX_glx_e', 'EX_id3acald_e', 'EX_ala__L_e', 'EX_asn__L_e', 'EX_asp__L_e', 'EX_cys__L_e', 'EX_glu__L_e', 'EX_gln__L_e', 'EX_phe__L_e', 'EX_ser__L_e', 'EX_trp__L_e', 'EX_tyr__L_e', 'EX_oaa_e', 'EX_pacald_e', 'EX_pyr_e', 'EX_succ_e', 'EX_ind3eth_e', 'EX_h2o_e', 'EX_g6p_e', 'EX_g1p_e', 'EX_2pg_e', 'EX_pser__L_e', 'EX_ppi_e', 'EX_pep_e', 'EX_cbp_e', 'EX_6pgc_e', 'EX_3pg_e', 'EX_cmp_e', 'GROWTH', 'EX_ccm_e', 'EX_pca_e', 'rev?EX_nh4_e', 'rev?EX_glc__D_e', 'rev?EX_h_e', 'rev?EX_fe2_e', 'rev?EX_o2_e', 'rev?EX_pi_e', 'rev?EX_k_e', 'rev?EX_na1_e', 'rev?EX_so4_e', 'rev?EX_cl_e', 'rev?EX_cu2_e', 'rev?EX_mn2_e', 'rev?EX_zn2_e', 'rev?EX_mg2_e', 'rev?EX_ca2_e']

In gamma but not in Graph: []


In [8]:
# Drop nodes without gamma
G.remove_nodes_from([item for item in listA if item not in listB])
print(f'# Nodes: {G.number_of_nodes()} \n# Edges: {G.number_of_edges()}')

# Nodes: 314 
# Edges: 6090


In [9]:
# reactions with gamma > 1
rxn_bad_gamma = gamma.columns[(gamma > 1).any()].tolist()
gamma.drop(columns=rxn_bad_gamma, inplace=True)
print(gamma.shape)

# Drop nodes with gamma > 0
G.remove_nodes_from(rxn_bad_gamma)
print(f'# Nodes: {G.number_of_nodes()} \n# Edges: {G.number_of_edges()}')

(1, 243)
# Nodes: 243 
# Edges: 3793


#### Add `gamma` values as Graph node features

In [14]:
for node in gamma.columns:
    try:
        G.nodes[node]['y'] =  gamma[node].values[0]
    except KeyError:
        pass

no_gamma_nodes = [node for node, data in G.nodes(data=True) if not data]

for node in no_gamma_nodes: G.nodes[node]['y'] = np.nan

#### Maybe, the Graph is ready afterall...

## Networkx to Torch Geometric

In [16]:
import torch
import torch.nn as nn
from torch_geometric.utils.convert import from_networkx

device = 'cuda' if torch.cuda.is_available() else 'cpu'

data = from_networkx(G, group_edge_attrs=all)

print(data)
print()
print(data.num_nodes ,data.num_edges)

Data(x=[243], edge_index=[2, 3793], y=[243], edge_attr=[3793, 1])

243 3793


In [34]:
# Create train, validation, and test masks
train_mask = torch.zeros(len(data.x), dtype=torch.bool)
train_mask[:train_size] = 1

val_mask = torch.zeros(len(data.x), dtype=torch.bool)
val_mask[train_size:train_size + val_size] = 1

test_mask = torch.zeros(len(data.x), dtype=torch.bool)
test_mask[train_size + val_size:] = 1

data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

## Create a GNN

In [None]:
import torch.nn.functional as F
from torch.optim import Optimizer
from torch_geometric.nn import GCNConv
from torch import Tensor

class GCN(torch.nn.Module):
    def __init__(self, num_node_features: int, num_classes: int, \
        hidden_dim: int=16, dropout_rate: float=.5) -> None:
        
        super().__init__()
        self.dropout1 = torch.nn.Dropout(dropout_rate)
        self.conv1 = GCNConv(num_node_features, hidden_dim)
        self.relu = torch.nn.ReLU(inplace=True)
        self.dropout2 = torch.nn.Dropout(dropout_rate)
        self.conv2 = GCNConv(hidden_dim, num_classes)

    def forward(self, x: Tensor, edge_index: Tensor) -> torch.Tensor:
        x = self.dropout1(x)
        x = self.conv1(x, edge_index)
        x = self.relu(x)
        x = self.dropout1(x)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)