In [250]:
# import data
import pandas as pd
df = pd.read_csv('ccm.csv')
df

Unnamed: 0,From,To,Enzyme
0,Glucose [c],Glucose-6-phosphate [c],Enzyme1
1,Glucose-6-phosphate [c],Fructose-6-phosphate [c],Enzyme2
2,Fructose-6-phosphate [c],Glucose-6-phosphate [c],Enzyme2
3,Fructose-6-phosphate [c],Fructose-1-6-phosphate [c],Enzyme3
4,Fructose-1-6-phosphate [c],Dihydroxyacetone phosphate [c],Enzyme4
...,...,...,...
87,3-Phosphoglycerate [c],Cysteine,PseudoEnzymes
88,3-Phosphoglycerate [c],Glycine,PseudoEnzymes
89,2-Oxoglutarate [m],Glutamine,PseudoEnzymes
90,2-Oxoglutarate [m],Glutamate,PseudoEnzymes


In [251]:
metabolites_to_enzymes = {}

for i in range(len(df)):

    from_metabolite = df.iloc[i]['From']
    to_metabolite = df.iloc[i]['To']
    enzyme = df.iloc[i]['Enzyme']
    
    if from_metabolite not in metabolites_to_enzymes:
        metabolites_to_enzymes[from_metabolite] = [(enzyme, to_metabolite)]
    else:
        metabolites_to_enzymes[from_metabolite].append((enzyme, to_metabolite))

""" print(metabolites_to_enzymes) """

""" for metabolite in metabolites_to_enzymes:
    print(metabolite, metabolites_to_enzymes[metabolite]) """

enzymes = []
for metabolite in metabolites_to_enzymes:
    for enzyme in metabolites_to_enzymes[metabolite]:
        enzymes.append(enzyme[0])
enzymes = list(set(enzymes))
print(enzymes)

# endpoints are "to" elements in the dataframe without [...] in the end of the string
endpoints = []
for i in range(len(df)):
    if df.iloc[i]['To'][-1:] != ']':
        endpoints.append(df.iloc[i]['To'])
endpoints = list(set(endpoints))
print(endpoints)


['Enzyme12', 'TKT', 'Enzyme4', 'Transporter1', 'Enzyme23', 'Enzyme32', 'Enzyme18', 'Enzyme20', 'Enzyme22', 'Enzyme3', 'Enzyme15', 'Enzyme6', 'Enzyme10', 'Enzyme21', 'Enzyme28', 'Enzyme35', 'Enzyme19', 'Enzyme26', 'Enzyme31', 'Enzyme16', 'Enzyme14', 'Enzyme33', 'Enzyme2', 'Enzyme11', 'Enzyme1', 'Enzyme5', 'PseudoEnzymes', 'Enzyme8', 'Enzyme30', 'TALDO', 'Enzyme7', 'Enzyme17', 'Enzyme24', 'Enzyme13', 'Enzyme25', 'Enzyme9', 'Transporter2', 'Enzyme34', 'Enzyme27', 'Enzyme29']
['Aspartate', 'Alanine', 'Asparagine', 'Glutamate', 'Fatty acids', 'Serine', 'Glutamine', 'Glycine', 'Cysteine', 'Nucleotides']


In [252]:
# for the given data, lets find all the paths from Glucose to Pyruvate

# create a visited dictionary
# if a metabolite is visited, then we don't need to visit it again

def find_paths(metabolite, path, paths, visited):
    # create a new visited list
    visited = visited.copy()
    if (metabolite in visited) and (visited[metabolite]):
        return
    """ print("metabolite and visited", metabolite, visited) """
    if metabolite in endpoints:
        if metabolite not in paths:
            paths[metabolite] = [path]
        else:
            paths[metabolite].append(path)
    elif metabolite not in metabolites_to_enzymes:
        return
    else:
        if visited[metabolite]:
            return

            
        for enzyme, product in metabolites_to_enzymes[metabolite]:
            visited[metabolite] = True
            find_paths(product, path + [(enzyme, product)], paths, visited)



In [253]:
def find_non_essential_enzymes(paths):
    non_used_enzymes_for_endpoint = {}
    for endpoint in paths:
        #find intersection of all the enzymes used in the paths to the endpoint
        essential_enzymes = []
        for element in paths[endpoint][0]:
            essential_enzymes.append(element[0])

        for path in paths[endpoint]:
            new_essential_enzymes = []

            for element in path:
                new_essential_enzymes.append(element[0])

            essential_enzymes = list(set(essential_enzymes).intersection(set(new_essential_enzymes)))

        non_used_enzymes_for_endpoint[endpoint] = list(set(enzymes) - set(essential_enzymes))
    return non_used_enzymes_for_endpoint

In [254]:
def find_all_non_essential_enzymes(non_used_enzymes_for_endpoint):
    all_non_used_enzymes = []

    for endpoint in non_used_enzymes_for_endpoint:
        all_non_used_enzymes += non_used_enzymes_for_endpoint[endpoint]
    all_non_used_enzymes = list(set(all_non_used_enzymes))

    for endpoint in non_used_enzymes_for_endpoint:
        for e in all_non_used_enzymes:
            if e not in non_used_enzymes_for_endpoint[endpoint]:
                all_non_used_enzymes.remove(e)
    return all_non_used_enzymes

In [257]:
paths = {}

visited = {}
for metabolite in metabolites_to_enzymes:
    visited[metabolite] = False

find_paths('Glucose [c]', [], paths, visited)

non_essential_enzymes_for_endpoint = find_non_essential_enzymes(paths)

non_essential_enzymes = find_all_non_essential_enzymes(non_essential_enzymes_for_endpoint)


print("all non essential enzymes: ", non_essential_enzymes)
print("all essential enzymes are: ", sorted(list(set(enzymes) - set(non_essential_enzymes))))

all non essential enzymes:  ['Enzyme12', 'TKT', 'Enzyme4', 'Transporter1', 'Enzyme23', 'Enzyme32', 'Enzyme18', 'Enzyme20', 'Enzyme22', 'Enzyme3', 'Enzyme15', 'Enzyme21', 'Enzyme28', 'Enzyme35', 'Enzyme19', 'Enzyme26', 'Enzyme31', 'Enzyme14', 'Enzyme33', 'Enzyme2', 'Enzyme11', 'Enzyme5', 'Enzyme30', 'TALDO', 'Enzyme7', 'Enzyme17', 'Enzyme24', 'Enzyme13', 'Enzyme25', 'Transporter2', 'Enzyme34', 'Enzyme27', 'Enzyme29']
all essential enzymes are:  ['Enzyme1', 'Enzyme10', 'Enzyme16', 'Enzyme6', 'Enzyme8', 'Enzyme9', 'PseudoEnzymes']


How it's done:

- Find every possible path for every single product
  
- Find all the non essential enzymes for each product
  
- Mash all the non essential enzymes together
  
- Find the intersection of all the non essential enzymes (an enzyme may be essential for one product but not for another)
  
- Remove the non essential enzymes from the list of all enzymes and get the **ESSENTIAL** ones