In [1]:
import numpy as np
import pandas as pd
import pickle

### Load CSV

In [2]:
models_list = ['iMM904_Model_ver2.csv', 'iYO844_Model.csv']
mvecs_list = ['iMM904_mvec_ver2.csv', 'iYO844_mvec.csv']

In [3]:
model_1 = pd.read_csv(models_list[0])
mvec_1 = pd.read_csv(mvecs_list[0])
model_2 = pd.read_csv(models_list[1])
mvec_2 = pd.read_csv(mvecs_list[1])

### Index enzyme

In [4]:
es_1 = model_1['Rule'].unique()     # enzymes 1
es_2 = model_2['BSU'].unique()      # enzymes 2
print(es_1.size, es_2.size)

es_all = np.concatenate((es_1, es_2), axis=0)   # all enzymes
num_e = es_all.size
print(num_e)

430 391
821


In [5]:
index_e = pd.DataFrame({'enzyme_name': es_all, 'index': list(range(num_e))}, columns=['enzyme_name', 'index'])
index_e.set_index('enzyme_name', inplace=True)
index_e

Unnamed: 0_level_0,index
enzyme_name,Unnamed: 1_level_1
YAL012W,0
YAL022C,1
YAL054C,2
YAL060W,3
YAR015W,4
...,...
BSU40190,816
BSU40320,817
BSU40340,818
BSU40420,819


### Index molecule

In [6]:
# ms_1 = mvec_1['meta)name'].unique()         # molecules 1
# ms_2 = mvec_2['reac_meta_name'].unique()    # molecules 2
ms_1 = model_1['reac_meta_name'].unique()     # molecules 1
ms_2 = model_2['reac_meta_name'].unique()     # molecules 2
print(ms_1.size, ms_2.size)

ms_all = np.unique(np.concatenate((ms_1, ms_2), axis=0))
num_m = ms_all.size
print(num_m)

844 654
1233


In [7]:
index_m = pd.DataFrame({'mole_name': ms_all, 'index': list(range(num_e, num_e+num_m))}, columns=['mole_name', 'index'])
index_m.set_index('mole_name', inplace=True)
index_m

Unnamed: 0_level_0,index
mole_name,Unnamed: 1_level_1
10fthf_c,821
10fthf_m,822
12d3k5m_c,823
12dag3p_BS_c,824
12dgr_BS_c,825
...,...
zn2_c,2049
zn2_e,2050
zym_int1_c,2051
zym_int2_c,2052


### Edge: reaction

In [8]:
reac_1 = model_1.loc[:, ['Rule', 'reac_meta_name', 'reac_meta_value']]
reac_1.columns = ['enzyme_name', 'mole_name', 'reac_value']

reac_2 = model_2.loc[:, ['BSU', 'reac_meta_name', 'reac_meta_value']]
reac_2.columns = ['enzyme_name', 'mole_name', 'reac_value']

reac = pd.concat((reac_1, reac_2), axis=0)

# Get the index of the enzyme and molecule
reac['e_idx'] = reac['enzyme_name'].apply(lambda x: index_e.loc[x]['index'])
reac['m_idx'] = reac['mole_name'].apply(lambda x: index_m.loc[x]['index'])
reac

Unnamed: 0,enzyme_name,mole_name,reac_value,e_idx,m_idx
0,YAL012W,nh4_c,1.0,0,1692
1,YAL012W,cyst__L_c,-1.0,0,1246
2,YAL012W,cys__L_c,1.0,0,1242
3,YAL012W,2obut_c,1.0,0,900
4,YAL022C,cytd_e,-1.0,1,1250
...,...,...,...,...,...
1516,BSU40420,dcamp_c,1.0,819,1264
1517,BSU40420,imp_c,-1.0,819,1560
1518,BSU40850,accoa_c,-1.0,820,1050
1519,BSU40850,acmalt_c,1.0,820,1064


In [9]:
# Get edge_type_id
reac['edge_type_id'] = reac['reac_value'].apply(lambda x: 1 if x > 0 else 0)
# Get node_id_source
reac['node_id_source'] = reac.apply(lambda x: x['e_idx'] if x['edge_type_id'] == 1 else x['m_idx'], axis=1)
# Get node_id_target
reac['node_id_target'] = reac.apply(lambda x: x['m_idx'] if x['edge_type_id'] == 1 else x['e_idx'], axis=1)
# Get edge_weight
reac['edge_weight'] = reac['reac_value'].apply(lambda x: abs(x))
# Reorganize the dataframe
reac = reac.loc[:, [ 'node_id_source', 'node_id_target', 'edge_type_id', 'edge_weight']]
reac.sort_values(by=['edge_type_id', 'node_id_source'], inplace=True)
links = reac
links

Unnamed: 0,node_id_source,node_id_target,edge_type_id,edge_weight
421,821,106,0,1.0
425,821,107,0,1.0
199,821,492,0,1.0
203,821,493,0,1.0
441,821,534,0,1.0
...,...,...,...,...
1510,817,2031,1,1.0
1512,818,1439,1,1.0
1513,818,1441,1,1.0
1516,819,1264,1,1.0


### Node: enzyme and molecule

In [10]:
nodes = pd.concat((index_e, index_m), axis=0)
nodes.reset_index(inplace=True)
nodes = nodes.loc[:, ['index', 'level_0']]
nodes.columns = ['node_id', 'node_name']
nodes['node_type_id'] = nodes['node_name'].apply(lambda x: 0 if x in es_all else 1)
nodes

Unnamed: 0,node_id,node_name,node_type_id
0,0,YAL012W,0
1,1,YAL022C,0
2,2,YAL054C,0
3,3,YAL060W,0
4,4,YAR015W,0
...,...,...,...
2049,2049,zn2_c,1
2050,2050,zn2_e,1
2051,2051,zym_int1_c,1
2052,2052,zym_int2_c,1


In [55]:
def conver_feature_to_list(x):
    return list(x['0':'127'])

def reorganize_mvec(mvec):
    m_col_name = mvec.columns.values.tolist()[0]
    mvec_feature = mvec.set_index(m_col_name)
    mvec_feature['feature'] = mvec_feature.apply(conver_feature_to_list, axis=1)
    mvec_feature = mvec_feature.loc[:, ['feature']]
    mvec_feature.index.names = ['mole_name']
    return mvec_feature

mvec_feature_1 = reorganize_mvec(mvec_1)
mvec_feature_2 = reorganize_mvec(mvec_2)

In [56]:
mvec_feature_1

Unnamed: 0_level_0,feature
mole_name,Unnamed: 1_level_1
urdglyc_c,"[0.04952139, -0.042879358, -0.040000286, 0.059..."
mi145p_n,"[0.08404267, -0.08703269, -0.09115698, 0.12334..."
mi145p_c,"[0.09349185, -0.10191302, -0.1009429, 0.140002..."
mi1456p_n,"[0.08794882, -0.10000939, -0.09175062, 0.11922..."
mi1345p_n,"[0.098794535, -0.11275593, -0.10735026, 0.1402..."
...,...
ind3eth_m,"[0.07928238, -0.102943406, -0.09499293, 0.1285..."
2phetoh_m,"[0.06461795, -0.07423491, -0.07374017, 0.09198..."
ppi_x,"[0.037581164, -0.0392159, -0.046081524, 0.0619..."
ppi_n,"[0.05182495, -0.057094637, -0.046423513, 0.070..."


In [57]:
mvec_feature_1.reset_index(inplace=True)
mvec_feature_1

Unnamed: 0,mole_name,feature
0,urdglyc_c,"[0.04952139, -0.042879358, -0.040000286, 0.059..."
1,mi145p_n,"[0.08404267, -0.08703269, -0.09115698, 0.12334..."
2,mi145p_c,"[0.09349185, -0.10191302, -0.1009429, 0.140002..."
3,mi1456p_n,"[0.08794882, -0.10000939, -0.09175062, 0.11922..."
4,mi1345p_n,"[0.098794535, -0.11275593, -0.10735026, 0.1402..."
...,...,...
691,ind3eth_m,"[0.07928238, -0.102943406, -0.09499293, 0.1285..."
692,2phetoh_m,"[0.06461795, -0.07423491, -0.07374017, 0.09198..."
693,ppi_x,"[0.037581164, -0.0392159, -0.046081524, 0.0619..."
694,ppi_n,"[0.05182495, -0.057094637, -0.046423513, 0.070..."


In [58]:
def in_2(x):
    return x in mvec_feature_2.index

mvec_feature_1['in_2'] = mvec_feature_1['mole_name'].apply(in_2)

In [60]:
mvec_feature_1[mvec_feature_1['in_2'] == True]

Unnamed: 0,mole_name,feature,in_2
19,25aics_c,"[0.095791824, -0.10178878, -0.10371979, 0.1387...",True
20,aicar_c,"[0.08177507, -0.08963004, -0.08735339, 0.12911...",True
21,dcdp_c,"[0.08189033, -0.1071362, -0.09535844, 0.136899...",True
22,fpram_c,"[0.076754905, -0.083748095, -0.082220845, 0.12...",True
23,pphn_c,"[0.07549488, -0.088979244, -0.07476784, 0.1141...",True
...,...,...,...
684,xu5p__D_c,"[0.06001385, -0.07027658, -0.07238399, 0.09575...",True
687,dhap_c,"[0.06260448, -0.07662991, -0.061600506, 0.0960...",True
688,glyc_e,"[0.054583374, -0.06382145, -0.05664242, 0.0882...",True
689,glyc_c,"[0.049524646, -0.07242145, -0.05241161, 0.0784...",True


In [35]:
mvec_feature = pd.concat((mvec_feature_1, mvec_feature_2), axis=0)
mvec_feature

Unnamed: 0_level_0,feature
mole_name,Unnamed: 1_level_1
urdglyc_c,"[0.04952139, -0.042879358, -0.040000286, 0.059..."
mi145p_n,"[0.08404267, -0.08703269, -0.09115698, 0.12334..."
mi145p_c,"[0.09349185, -0.10191302, -0.1009429, 0.140002..."
mi1456p_n,"[0.08794882, -0.10000939, -0.09175062, 0.11922..."
mi1345p_n,"[0.098794535, -0.11275593, -0.10735026, 0.1402..."
...,...
cd2_c,"[0.013493312, 0.033150654, -0.02846567, 0.0330..."
cd2_e,"[0.018029932, 0.024222009, -0.023714174, 0.042..."
ca2_c,"[0.013637214, 0.03299028, -0.03517258, 0.04593..."
ca2_e,"[0.016641954, 0.029946955, -0.027672498, 0.050..."


### Node Label: km and kcat

In [11]:
labels_1 = model_1.loc[:, ['Rule', 'Km', 'Kcat']]
labels_1.columns = ['enzyme_name', 'Km', 'Kcat']
labels_2 = model_2.loc[:, ['BSU', 'km', 'kcat']]
labels_2.columns = ['enzyme_name', 'Km', 'Kcat']
labels = pd.concat((labels_1, labels_2), axis=0)

In [12]:
def check_nan(x):
    return (not pd.isnull(x['Km'])) or (not pd.isnull(x['Kcat']))

labels = labels[labels.apply(check_nan, axis=1)]
labels['node_id'] = labels['enzyme_name'].apply(lambda x: index_e.loc[x]['index'])

In [13]:
labels

Unnamed: 0,enzyme_name,Km,Kcat,node_id
1,YAL012W,0.45000,1.41,0
2,YAL012W,0.27500,0.15,0
10,YAL054C,0.13650,,2
11,YAL054C,0.13650,,2
12,YAL054C,1.20000,,2
...,...,...,...,...
1494,BSU39760,0.12000,,812
1495,BSU39760,0.06000,,812
1497,BSU39760,0.06000,1.10,812
1501,BSU39980,0.00428,,813
