In [1]:
import pandas as pd
import re
import html
import numpy as np

### Load and clean original data

In [None]:
df_act = pd.read_excel('Allosteric_interactions_BRENDA.xlsx', sheet_name='Activators', names=['EC', 'Enz', 'Met', 'Org', 'Mode'], usecols='A,B,C,E,I')
df_inh = pd.read_excel('Allosteric_interactions_BRENDA.xlsx', sheet_name='Inhibitors', names=['EC', 'Enz', 'Met', 'Org', 'Mode'], usecols='A,B,C,E,I')

#Remove metabolites named "additional information" and data from viruses, and limit species name to two names
for ind, row in df_act.iterrows():
    if re.search('virus', row['Org']) or re.search('additional information', row['Met']):
        df_act.drop(index=ind, inplace=True)
    df_act['Org'][ind] = ' '.join(row['Org'].split()[:2])

for ind, row in df_inh.iterrows():
    if re.search('virus', row['Org']) or re.search('additional information', row['Met']):
        df_inh.drop(index=ind, inplace=True)
    df_inh['Org'][ind] = ' '.join(row['Org'].split()[:2])

#Save the cleaned dataframes to csv
df_act.to_csv('activators_clean.csv')
df_inh.to_csv('inhibitors_clean.csv')

In [29]:
df_act = pd.read_excel('Allosteric_interactions_BRENDA.xlsx', sheet_name='Activators', names=['EC', 'Enz', 'Met', 'Org', 'Mode'], usecols='A,B,C,E,I')

In [77]:
activators_no_viruses = df_act[~df_act['Org'].str.contains('virus')]
activators_no_addinf = activators_no_viruses[~activators_no_viruses['Met'].str.contains('additional information')]

activators_no_addinf['Org'] = activators_no_addinf['Org'].apply(lambda x: ' '.join(x.split()[:2]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activators_no_addinf['Org'] = activators_no_addinf['Org'].apply(lambda x: ' '.join(x.split()[:2]))


In [78]:
display(activators_no_addinf)

Unnamed: 0,EC,Enz,Met,Org,Mode
0,1.1.1.1,alcohol dehydrogenase,"1,4-dioxane",Sulfolobus acidocaldarius,+
1,1.1.1.1,alcohol dehydrogenase,2-mercaptoethanol,Sulfolobus acidocaldarius,+
2,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,+
3,1.1.1.1,alcohol dehydrogenase,acetonitrile,Sulfolobus acidocaldarius,+
4,1.1.1.1,alcohol dehydrogenase,acetonitrile,Aeropyrum pernix,+
...,...,...,...,...,...
35975,7.6.2.9,ABC-type quaternary amine transporter,glycerol,Lactococcus lactis,+
35980,7.6.2.9,ABC-type quaternary amine transporter,negatively charged lipid,Lactococcus lactis,+
35981,7.6.2.9,ABC-type quaternary amine transporter,phosphate ion-containing buffer,Lactococcus lactis,+
35982,7.6.2.9,ABC-type quaternary amine transporter,phosphatidylethanolamine,Lactococcus lactis,+


### Integrate ChEBI and InChI Ids

In [1]:
#Load chebi and inchi IDs
df_chebi_BRENDA = pd.read_csv('brenda_compounds.tsv', sep='\t', names=['Met', 'Inchi', 'ChEBI'], header=0)

#Integrate chebi and inchi IDs into BRENDA data
df_act_chebi = pd.merge(df_act, df_chebi_BRENDA[['Met', 'ChEBI', 'Inchi']], how="left", on='Met')
df_inh_chebi = pd.merge(df_inh, df_chebi_BRENDA[['Met', 'ChEBI', 'Inchi']], how="left", on='Met')

#Save dataframes as csv
df_act_chebi.to_csv('activators_clean_chebi_inhci.csv')
df_inh_chebi.to_csv('inhibitors_clean_chebi_inchi.csv')

NameError: name 'pd' is not defined

### Remove inorganic compounds

In [4]:
#Load data
df_act = pd.read_csv('activators_clean_chebi_inhci.csv', usecols=[1,2,3,4,5,6,7], header=0).drop_duplicates()
df_inh = pd.read_csv('inhibitors_clean_chebi_inchi.csv', usecols=[1,2,3,4,5,6,7], header=0).drop_duplicates()

org = ['H', 'C', 'O']

#Remove inorganic compounds
for ind, row in df_act.iterrows():
    is_org = all([char in str(row['Inchi'])[5:] for char in org])
    if not is_org:
        df_act.drop(index=ind, inplace=True)

for ind, row in df_inh.iterrows():
    is_org = all([char in str(row['Inchi'])[5:] for char in org])
    if not is_org:
        df_inh.drop(index=ind, inplace=True)

#Save the filtered datafiles
df_act.to_csv('activators_organic.csv')
df_inh.to_csv('inhibitors_organic.csv')

KeyboardInterrupt: 

#### Integrate ChEBI Ids

In [11]:
#Load and clean data from BRENDA
df_act = pd.read_csv('activators_organic.csv', names=['EC', 'Enz', 'Met', 'Org', 'Mode'], usecols=[1,2,3,4,5], header=0)
df_inh = pd.read_csv('inhibitors_organic.csv', names=['EC', 'Enz', 'Met', 'Org', 'Mode'], usecols=[1,2,3,4,5], header=0)

#Load chebi IDs
df_chebi_BRENDA = pd.read_csv('brenda_compounds.tsv', sep='\t', names=['Met', 'Inchi', 'ChEBI'], header=0)

#Integrate chebi IDs into BRENDA data
df_act = pd.merge(df_act, df_chebi_BRENDA[['Met', 'ChEBI']], how="left", on='Met')
df_inh = pd.merge(df_inh, df_chebi_BRENDA[['Met', 'ChEBI']], how="left", on='Met')

#Save dataframes to csv
df_act.to_csv('activators_organic_chebi.csv')
df_inh.to_csv('inhibitors_organic_chebi.csv')

### Isolate intracellular compounds

#### Load and clean BiGG data

In [2]:
df_bigg = pd.read_excel('bigg_metabolites.xlsx')

# Get one database link per row
df_bigg_split = df_bigg.assign(database_links=df_bigg['database_links'].str.split(';')).explode('database_links')

In [3]:
df_bigg_split.to_csv('bigg_metabolites_split.csv')

In [4]:
display(df_bigg_split)

Unnamed: 0,bigg_id,universal_bigg_id,name,model_list,database_links,old_bigg_ids
0,15dap_c,15dap,"1,5-Diaminopentane",iECUMN_1333; iLF82_1304; iETEC_1333; iECSF_132...,KEGG Compound: http://identifiers.org/kegg.com...,15dap; 15dap[c]; 15dap_c
0,15dap_c,15dap,"1,5-Diaminopentane",iECUMN_1333; iLF82_1304; iETEC_1333; iECSF_132...,CHEBI: http://identifiers.org/chebi/CHEBI:13928,15dap; 15dap[c]; 15dap_c
0,15dap_c,15dap,"1,5-Diaminopentane",iECUMN_1333; iLF82_1304; iETEC_1333; iECSF_132...,CHEBI: http://identifiers.org/chebi/CHEBI:18127,15dap; 15dap[c]; 15dap_c
0,15dap_c,15dap,"1,5-Diaminopentane",iECUMN_1333; iLF82_1304; iETEC_1333; iECSF_132...,CHEBI: http://identifiers.org/chebi/CHEBI:22974,15dap; 15dap[c]; 15dap_c
0,15dap_c,15dap,"1,5-Diaminopentane",iECUMN_1333; iLF82_1304; iETEC_1333; iECSF_132...,CHEBI: http://identifiers.org/chebi/CHEBI:3288,15dap; 15dap[c]; 15dap_c
...,...,...,...,...,...,...
5261,23dhb_p,23dhb,"2,3-Dihydroxybenzoate",iYS1720,InChI Key: https://identifiers.org/inchikey/G...,23dhb; 23dhb_p
5261,23dhb_p,23dhb,"2,3-Dihydroxybenzoate",iYS1720,Human Metabolome Database: http://identifiers...,23dhb; 23dhb_p
5261,23dhb_p,23dhb,"2,3-Dihydroxybenzoate",iYS1720,BioCyc: http://identifiers.org/biocyc/META:2-...,23dhb; 23dhb_p
5261,23dhb_p,23dhb,"2,3-Dihydroxybenzoate",iYS1720,MetaNetX (MNX) Chemical: http://identifiers.o...,23dhb; 23dhb_p


In [5]:
# Isolate the chebi id rows from the bigg dataframe
bigg_chebis = df_bigg_split[df_bigg_split['database_links'].str.contains('chebi','CHEBI')]

In [6]:
# Extract the chebi id (ChEBI:12345) from the database links
def extract_chebi(link):
    for i in range(1,7):
        if link[-i].isdigit():
            continue
        else:
            index = i+5
            return link[-index:]

In [7]:
# Make list of the chebi ids
bigg_chebi_list = bigg_chebis['database_links'].apply(lambda x: extract_chebi(x)).tolist()

In [8]:
bigg_chebi_list

['CHEBI:13928',
 'CHEBI:18127',
 'CHEBI:22974',
 'CHEBI:3288',
 'CHEBI:44370',
 'CHEBI:58384',
 'CHEBI:11420',
 'CHEBI:11423',
 'CHEBI:15941',
 'CHEBI:19311',
 'CHEBI:48968',
 'CHEBI:57576',
 'CHEBI:58764',
 'CHEBI:877',
 'CHEBI:10846',
 'CHEBI:15572',
 'CHEBI:170',
 'CHEBI:18521',
 'CHEBI:57417',
 'CHEBI:12428',
 'CHEBI:17455',
 'CHEBI:21467',
 'CHEBI:58154',
 'CHEBI:7084',
 'CHEBI:13192',
 'CHEBI:16026',
 'CHEBI:21428',
 'CHEBI:21429',
 'CHEBI:47031',
 'CHEBI:57609',
 'CHEBI:6341',
 None,
 None,
 'CHEBI:13048',
 'CHEBI:16944',
 'CHEBI:21195',
 'CHEBI:35229',
 'CHEBI:40668',
 'CHEBI:40673',
 'CHEBI:6156',
 'CHEBI:78948',
 'CHEBI:1057',
 'CHEBI:11548',
 'CHEBI:17860',
 'CHEBI:19528',
 'CHEBI:58298',
 'CHEBI:1071',
 'CHEBI:11561',
 'CHEBI:17094',
 'CHEBI:19545',
 'CHEBI:61873',
 'CHEBI:1123',
 'CHEBI:11583',
 'CHEBI:15194',
 'CHEBI:16992',
 'CHEBI:19605',
 'CHEBI:57978',
 'CHEBI:1233',
 'CHEBI:62730',
 'CHEBI:60655',
 'CHEBI:1235',
 'CHEBI:1236',
 'CHEBI:40398',
 'CHEBI:40407',
 'CHEBI:

### Integrate with BRENDA data

In [9]:
# Load BRENDA data
df_act = pd.read_csv('activators_clean_chebi_inhci.csv', header=0, names=['EC','Enz','Met','Org','Mode','ChEBI'], usecols=[1,2,3,4,5,6]).drop_duplicates()
df_inh = pd.read_csv('inhibitors_clean_chebi_inchi.csv', header=0, names=['EC','Enz','Met','Org','Mode','ChEBI'], usecols=[1,2,3,4,5,6]).drop_duplicates()

In [10]:
display(df_act)

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI
0,1.1.1.1,alcohol dehydrogenase,"1,4-dioxane",Sulfolobus acidocaldarius,+,CHEBI:47032
1,1.1.1.1,alcohol dehydrogenase,2-mercaptoethanol,Sulfolobus acidocaldarius,+,CHEBI:41218
2,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,+,CHEBI:17824
3,1.1.1.1,alcohol dehydrogenase,acetonitrile,Sulfolobus acidocaldarius,+,CHEBI:38472
4,1.1.1.1,alcohol dehydrogenase,acetonitrile,Aeropyrum pernix,+,CHEBI:38472
...,...,...,...,...,...,...
29738,7.6.2.9,ABC-type quaternary amine transporter,glycerol,Lactococcus lactis,+,CHEBI:17754
29739,7.6.2.9,ABC-type quaternary amine transporter,negatively charged lipid,Lactococcus lactis,+,
29740,7.6.2.9,ABC-type quaternary amine transporter,phosphate ion-containing buffer,Lactococcus lactis,+,
29741,7.6.2.9,ABC-type quaternary amine transporter,phosphatidylethanolamine,Lactococcus lactis,+,-


In [12]:
df_act_intracellular = df_act[df_act['ChEBI'].isin(bigg_chebi_list)]
#Those with NaN or '-' (without CHEBI id) were not included in the intracellular 

In [13]:
df_inh_intracellular = df_inh[df_inh['ChEBI'].isin(bigg_chebi_list)]

In [14]:
df_act_intracellular

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI
2,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,+,CHEBI:17824
13,1.1.1.1,alcohol dehydrogenase,ethanol,Saccharomyces cerevisiae,+,CHEBI:16236
17,1.1.1.1,alcohol dehydrogenase,Isopropanol,Saccharomyces cerevisiae,+,CHEBI:17824
26,1.1.1.1,alcohol dehydrogenase,Urea,Thermus sp.,+,CHEBI:16199
29,1.1.1.101,acylglycerone-phosphate reductase,dihydroxyacetone,Saccharomyces cerevisiae,+,CHEBI:16016
...,...,...,...,...,...,...
29723,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Homo sapiens,+,CHEBI:16856
29724,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Rattus norvegicus,+,CHEBI:16856
29725,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Saccharomyces cerevisiae,+,CHEBI:16856
29730,7.6.2.3,ABC-type glutathione-S-conjugate transporter,taurocholate,Leucoraja erinacea,+,CHEBI:28865


In [15]:
df_act_intracellular.to_csv('activators_intracellular.csv')

In [16]:
df_inh_intracellular

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI
56,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,-,CHEBI:17824
58,1.1.1.1,alcohol dehydrogenase,2-thioacetate,Equus caballus,-,CHEBI:30066
107,1.1.1.1,alcohol dehydrogenase,6-thioguanine,Equus caballus,-,CHEBI:9555
112,1.1.1.1,alcohol dehydrogenase,acetaldehyde,Saccharomyces cerevisiae,-,CHEBI:15343
113,1.1.1.1,alcohol dehydrogenase,acetaldehyde,Meyerozyma guilliermondii,-,CHEBI:15343
...,...,...,...,...,...,...
260700,7.6.2.9,ABC-type quaternary amine transporter,Ca2+,Lactococcus lactis,-,CHEBI:29108
260701,7.6.2.9,ABC-type quaternary amine transporter,carnitine,Lactococcus lactis,-,CHEBI:3424
260702,7.6.2.9,ABC-type quaternary amine transporter,choline,Aphanothece halophytica,-,CHEBI:15354
260706,7.6.2.9,ABC-type quaternary amine transporter,proline,Lactococcus lactis,-,CHEBI:60039


In [17]:
df_inh_intracellular.to_csv('inhibitors_intracellular.csv')

### Remove inorganic compounds from intracellular interactions

In [2]:
df_act_intracellular = pd.read_csv('activators_intracellular.csv', names=['EC', 'Enz', 'Met', 'Org', 'Mode', 'ChEBI'], usecols=[1,2,3,4,5,6], header=0)
df_inh_intracellular = pd.read_csv('inhibitors_intracellular.csv', names=['EC', 'Enz', 'Met', 'Org', 'Mode', 'ChEBI'], usecols=[1,2,3,4,5,6], header=0)

In [18]:
#Load chebi and inchi IDs
df_inchi_BRENDA = pd.read_csv('brenda_compounds.tsv', sep='\t', names=['Met', 'Inchi', 'ChEBI'], header=0)

#Integrate chebi and inchi IDs into BRENDA data
df_act_inchi = pd.merge(df_act_intracellular, df_inchi_BRENDA[['Met', 'Inchi']], how="left", on='Met')
df_inh_inchi = pd.merge(df_inh_intracellular, df_inchi_BRENDA[['Met', 'Inchi']], how="left", on='Met')

In [19]:
df_act_inchi['Inchi'] = df_act_inchi['Inchi'].str.split('/').str[1]

In [20]:
df_inh_inchi['Inchi'] = df_inh_inchi['Inchi'].str.split('/').str[1]

In [21]:
df_inh_inchi

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI,Inchi
0,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,-,CHEBI:17824,C3H8O
1,1.1.1.1,alcohol dehydrogenase,2-thioacetate,Equus caballus,-,CHEBI:30066,C2H4O2S
2,1.1.1.1,alcohol dehydrogenase,6-thioguanine,Equus caballus,-,CHEBI:9555,C5H5N5S
3,1.1.1.1,alcohol dehydrogenase,acetaldehyde,Saccharomyces cerevisiae,-,CHEBI:15343,C2H4O
4,1.1.1.1,alcohol dehydrogenase,acetaldehyde,Meyerozyma guilliermondii,-,CHEBI:15343,C2H4O
...,...,...,...,...,...,...,...
54202,7.6.2.9,ABC-type quaternary amine transporter,Ca2+,Lactococcus lactis,-,CHEBI:29108,Ca
54203,7.6.2.9,ABC-type quaternary amine transporter,carnitine,Lactococcus lactis,-,CHEBI:3424,C7H15NO3
54204,7.6.2.9,ABC-type quaternary amine transporter,choline,Aphanothece halophytica,-,CHEBI:15354,C5H14NO
54205,7.6.2.9,ABC-type quaternary amine transporter,proline,Lactococcus lactis,-,CHEBI:60039,C5H9NO2


In [22]:
import re
from warnings import warn


ELEMENT_RE = re.compile(r'(?P<atom>[A-Z][a-z]?)(?P<coeff>\d*)')


def parse_formula(formula):
    """ Convert compound formula from string to dictionary.

    For example, C6H12O6 (glucose) becomes {C:6, H:12, O:6}.

    Args:
        formula (str): compound formula

    Returns:
        dict: formula as a dictionary
    """
    return {atom: (int(coeff) if coeff else 1)
            for atom, coeff in re.findall(ELEMENT_RE, formula)}



In [23]:
df_act_inchi['formula'] = df_act_inchi['Inchi'].apply(lambda x: parse_formula(x))

In [24]:
df_inh_inchi['formula'] = df_inh_inchi['Inchi'].apply(lambda x: parse_formula(x))

In [50]:
df_inh_inchi.loc[df_inh_inchi['Met'] == 'CN-']

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI,Inchi,formula,organic
1589,1.1.1.284,S-(hydroxymethyl)glutathione dehydrogenase,CN-,[Candida] boidinii,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
1590,1.1.1.284,S-(hydroxymethyl)glutathione dehydrogenase,CN-,Komagataella pastoris,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
2419,1.1.1.41,isocitrate dehydrogenase (NAD+),CN-,Bos taurus,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
2420,1.1.1.41,isocitrate dehydrogenase (NAD+),CN-,Pisum sativum,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
2421,1.1.1.41,isocitrate dehydrogenase (NAD+),CN-,Saccharomyces cerevisiae,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
...,...,...,...,...,...,...,...,...,...
53749,7.1.1.9,cytochrome-c oxidase,CN-,Magnetospirillum magnetotacticum,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
53750,7.1.1.9,cytochrome-c oxidase,CN-,Bacillus sp.,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
53751,7.1.1.9,cytochrome-c oxidase,CN-,Nitrosomonas europaea,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1
53752,7.1.1.9,cytochrome-c oxidase,CN-,Rattus norvegicus,-,CHEBI:18407,CHN,"{'C': 1, 'H': 1, 'N': 1}",1


In [25]:
df_act_inchi

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI,Inchi,formula
0,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,+,CHEBI:17824,C3H8O,"{'C': 3, 'H': 8, 'O': 1}"
1,1.1.1.1,alcohol dehydrogenase,ethanol,Saccharomyces cerevisiae,+,CHEBI:16236,C2H6O,"{'C': 2, 'H': 6, 'O': 1}"
2,1.1.1.1,alcohol dehydrogenase,Isopropanol,Saccharomyces cerevisiae,+,CHEBI:17824,C3H8O,"{'C': 3, 'H': 8, 'O': 1}"
3,1.1.1.1,alcohol dehydrogenase,Urea,Thermus sp.,+,CHEBI:16199,CH4N2O,"{'C': 1, 'H': 4, 'N': 2, 'O': 1}"
4,1.1.1.101,acylglycerone-phosphate reductase,dihydroxyacetone,Saccharomyces cerevisiae,+,CHEBI:16016,C3H6O3,"{'C': 3, 'H': 6, 'O': 3}"
...,...,...,...,...,...,...,...,...
8718,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Homo sapiens,+,CHEBI:16856,C10H17N3O6S,"{'C': 10, 'H': 17, 'N': 3, 'O': 6, 'S': 1}"
8719,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Rattus norvegicus,+,CHEBI:16856,C10H17N3O6S,"{'C': 10, 'H': 17, 'N': 3, 'O': 6, 'S': 1}"
8720,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Saccharomyces cerevisiae,+,CHEBI:16856,C10H17N3O6S,"{'C': 10, 'H': 17, 'N': 3, 'O': 6, 'S': 1}"
8721,7.6.2.3,ABC-type glutathione-S-conjugate transporter,taurocholate,Leucoraja erinacea,+,CHEBI:28865,C26H45NO7S,"{'C': 26, 'H': 45, 'N': 1, 'O': 7, 'S': 1}"


In [47]:
def is_organic(formula):
    if all(key in formula for key in ('C', 'H', 'O')):
        return 1
    return 0

In [48]:
def is_organic2(formula):
    if all(key in formula for key in ('C', 'H')):
        return 0
    return 1

In [54]:
is_organic({'C': 1, 'H': 1, 'O': 1})

1

In [56]:
df_act_inchi['organic'] = df_act_inchi['formula'].apply(lambda x: is_organic(x))

In [57]:
df_inh_inchi['organic'] = df_inh_inchi['formula'].apply(lambda x: is_organic(x))

In [58]:
df_act_intra_org = df_act_inchi[df_act_inchi.organic != 0]

In [59]:
df_inh_intra_org = df_inh_inchi[df_inh_inchi.organic != 0]

In [60]:
display(df_inh_intra_org)

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI,Inchi,formula,organic
0,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,-,CHEBI:17824,C3H8O,"{'C': 3, 'H': 8, 'O': 1}",1
1,1.1.1.1,alcohol dehydrogenase,2-thioacetate,Equus caballus,-,CHEBI:30066,C2H4O2S,"{'C': 2, 'H': 4, 'O': 2, 'S': 1}",1
3,1.1.1.1,alcohol dehydrogenase,acetaldehyde,Saccharomyces cerevisiae,-,CHEBI:15343,C2H4O,"{'C': 2, 'H': 4, 'O': 1}",1
4,1.1.1.1,alcohol dehydrogenase,acetaldehyde,Meyerozyma guilliermondii,-,CHEBI:15343,C2H4O,"{'C': 2, 'H': 4, 'O': 1}",1
5,1.1.1.1,alcohol dehydrogenase,acetaldehyde,Crocus sativus,-,CHEBI:15343,C2H4O,"{'C': 2, 'H': 4, 'O': 1}",1
...,...,...,...,...,...,...,...,...,...
54201,7.6.2.9,ABC-type quaternary amine transporter,Betaine aldehyde,Aphanothece halophytica,-,CHEBI:15710,C5H12NO,"{'C': 5, 'H': 12, 'N': 1, 'O': 1}",1
54203,7.6.2.9,ABC-type quaternary amine transporter,carnitine,Lactococcus lactis,-,CHEBI:3424,C7H15NO3,"{'C': 7, 'H': 15, 'N': 1, 'O': 3}",1
54204,7.6.2.9,ABC-type quaternary amine transporter,choline,Aphanothece halophytica,-,CHEBI:15354,C5H14NO,"{'C': 5, 'H': 14, 'N': 1, 'O': 1}",1
54205,7.6.2.9,ABC-type quaternary amine transporter,proline,Lactococcus lactis,-,CHEBI:60039,C5H9NO2,"{'C': 5, 'H': 9, 'N': 1, 'O': 2}",1


In [36]:
display(df_act_intra_org)

Unnamed: 0,EC,Enz,Met,Org,Mode,ChEBI,Inchi,formula,organic
0,1.1.1.1,alcohol dehydrogenase,2-propanol,Sulfolobus acidocaldarius,+,CHEBI:17824,C3H8O,"{'C': 3, 'H': 8, 'O': 1}",1
1,1.1.1.1,alcohol dehydrogenase,ethanol,Saccharomyces cerevisiae,+,CHEBI:16236,C2H6O,"{'C': 2, 'H': 6, 'O': 1}",1
2,1.1.1.1,alcohol dehydrogenase,Isopropanol,Saccharomyces cerevisiae,+,CHEBI:17824,C3H8O,"{'C': 3, 'H': 8, 'O': 1}",1
3,1.1.1.1,alcohol dehydrogenase,Urea,Thermus sp.,+,CHEBI:16199,CH4N2O,"{'C': 1, 'H': 4, 'N': 2, 'O': 1}",1
4,1.1.1.101,acylglycerone-phosphate reductase,dihydroxyacetone,Saccharomyces cerevisiae,+,CHEBI:16016,C3H6O3,"{'C': 3, 'H': 6, 'O': 3}",1
...,...,...,...,...,...,...,...,...,...
8718,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Homo sapiens,+,CHEBI:16856,C10H17N3O6S,"{'C': 10, 'H': 17, 'N': 3, 'O': 6, 'S': 1}",1
8719,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Rattus norvegicus,+,CHEBI:16856,C10H17N3O6S,"{'C': 10, 'H': 17, 'N': 3, 'O': 6, 'S': 1}",1
8720,7.6.2.3,ABC-type glutathione-S-conjugate transporter,glutathione,Saccharomyces cerevisiae,+,CHEBI:16856,C10H17N3O6S,"{'C': 10, 'H': 17, 'N': 3, 'O': 6, 'S': 1}",1
8721,7.6.2.3,ABC-type glutathione-S-conjugate transporter,taurocholate,Leucoraja erinacea,+,CHEBI:28865,C26H45NO7S,"{'C': 26, 'H': 45, 'N': 1, 'O': 7, 'S': 1}",1


In [61]:
df_act_intra_org.to_csv('activators_intracellular.csv')

In [62]:
df_inh_intra_org.to_csv('inhibitors_intracellular.csv')