# Gapfilling with Sco Model

In [None]:
import cobra
from cobra.core import Metabolite, Reaction
from cobra.io import read_sbml_model, load_json_model
from cobra.flux_analysis import gapfill
import os, re
import pandas as pd

print(os.getcwd())

ext_dir = '/../../../data/external'
phenomics = '/../../phenomics'

# Import model
# model = read_sbml_model(f"{os.getcwd()}/{ext_dir}/Salb-GEM.xml")
model = read_sbml_model(f"{os.getcwd()}/{ext_dir}/Salb-GEM-Updated.xml")

# Mapping of the sco_model reaction model
sco_model = read_sbml_model(f"{os.getcwd()}/{ext_dir}/Sco-GEM.xml")

# 1. Get all untestable and false negative compounds 

In [None]:
# Import agreed biolog vs model data
agreed_bio_data = pd.read_csv(f"{os.getcwd()}{phenomics}/agreed_bio_data_Salb.csv")
agreed_bio_data['model_simulation_0.05'] = agreed_bio_data['model_simulation_0.05'].astype('boolean')

# # Mapping between bigg and metax names
# bigg_mnmx = pd.read_json(f"{os.getcwd()}/{ext_dir}/metanetx.json")

# # Mapping files for bigg compound to all other names
# name_map = pd.read_csv(f"{os.getcwd()}/{ext_dir}/biggmodels_metabolites.txt",sep='\t')
# name_map = name_map.drop(['bigg_id', 'model_list', 'old_bigg_ids'], axis=1)

agreed_bio_data

In [None]:
print(f"\nFalse negative (actual true, predicted false):\n")
agreed_bio_data_nan_drop = agreed_bio_data[agreed_bio_data['model_simulation_0.05'].notna()]
false_negatives = agreed_bio_data_nan_drop[
    agreed_bio_data_nan_drop.xs("activity", axis=1)
    & ~(agreed_bio_data_nan_drop.xs("model_simulation_0.05", axis=1))
    ][["bigg","chemical", "moa", "exchange", "model_simulation_0.05"]]

false_negatives

false_negatives.to_csv(os.getcwd() + "/" + 'Salb_false_negative.csv', index=True)

# 2. Try Gapfilling Function on false negatives

In [None]:
def basic_gapfill(bigg, model, reference, objective='growth', iter=1):
    """
    gapfill a model using reference model and cplex solver.
    
    Parameters:
    -----------
    bigg: String
    model: Model
    reference: Model
    """
    print(f"\nGapfilling {bigg} with: ")
    with reference:
        reference.solver = 'cplex'
        model.solver = 'cplex'
        model.objective = objective
        solution = gapfill(model, reference, demand_reactions=False, iterations=iter)
        print(solution)
    return solution 

def gapfill_medium(model, reference, bigg, type, exchange):
    """
    This function does gapfilling based on a chemical in the certain biolog medium and add an exchange reaction if needed"
    
    Parameters:
    -----------
    bigg (str) 
    category (str)
    exchange (str)
    """

    # when model has the exchange reaction, no need to add a reaction before gapfilling.  
    try:
        with model:
            medium = model.medium
            medium[exchange] = 0.8
            if type.startswith("C"):
                medium["EX_glc__D_e"] = 0

            elif type.startswith("N"):
                medium["EX_nh4_e"] = 0

            elif type.startswith("P"):
                medium["EX_pi_e"] = 0
            
            elif type.startswith("S"):
                medium["EX_so4_e"] = 0

            model.medium = medium
            model.reactions.EX_co2_e.bounds= (0, 1000)

            return basic_gapfill(bigg, model, reference, 'growth', 4)
    except:
        print(f"Gapfilling of the compound {bigg} - {type} - {exchange} is failed\n")
        return None


In [None]:
_model = model.copy()
with open('gapfill_reactions.txt', 'w') as file:
    for ind in false_negatives.index:
        file.write(f"Gapfilling {false_negatives['bigg'][ind]}, {false_negatives['moa'][ind]} with:\n")
        solution = gapfill_medium(_model, sco_model, false_negatives['bigg'][ind], false_negatives['moa'][ind], false_negatives['exchange'][ind])
        if solution:
            unique_solution = set([reaction for reaction_set in solution for reaction in reaction_set])
            reactions_str = ', '.join(str(reaction.id) for reaction in unique_solution)
            print(reactions_str)
            file.write(reactions_str + '\n\n')
        else:
            file.write('\n\n')

In [None]:
# Initialize an empty list to store the extracted lists
gapfill_lists = []

# Open and read the file
with open('gapfill_reactions.txt', 'r') as file:
    lines = file.readlines()

# Process every second line and split it into a list
for i in range(1, len(lines), 3):
    reaction_list = [item.strip() for item in lines[i].strip().split(',')]
    gapfill_lists.extend(reaction_list)

merged_lists = set(gapfill_lists)
merged_lists.discard('')

# Print the extracted lists
print(merged_lists)

# 3. Get a short list by testing new reactions

In [None]:
short_list = []
for reaction_id in merged_lists:
    # Get the reaction from the reference model
    reaction = sco_model.reactions.get_by_id(reaction_id)
    model_temp = model.copy()
    
    with model_temp:
        # Add a copy of the reaction to your model
        model_temp.add_reactions([reaction.copy()])
        model_temp.solver ='cplex'
        model_temp.reactions.EX_co2_e.bounds= (0, 1000)

        solution = model_temp.optimize()
        print (f"------\nTest with {reaction_id} \nSolution: {solution}\n--------")

        if solution.objective_value < 1:
            short_list.append(reaction_id)

short_list

In [None]:
import requests, re

def check_annotation_uniprot(reaction, taxonomy_id):
    ec_numbers = reaction.annotation.get('ec-code', [])
    result = []
    for ec in ec_numbers:
        # Ensure the EC number has only digits and dots
        if re.match(r'^\d+(\.\d+)+$', ec):
            print(ec)
            query = f'(taxonomy_id:{taxonomy_id}) AND (ec:{ec})'
            url = 'https://rest.uniprot.org/uniprotkb/search?'
            params = {
                'query': query,
                'format': 'list',
            }
            response = requests.get(url, params=params)
            if response.text != '':
                result.append(response.text)
    print(result)
    return result

def check_transport_reaction(reaction):
    compartments = {metabolite.compartment for metabolite in reaction.metabolites}
    is_transport = len(compartments) > 1
    
    print(f"transport reaction: {is_transport}")
   
    return is_transport

In [None]:
# Fact checking using different methods, if a reaction is not found in uniprot, then it's likely the reaction doesn't exist
taxonomy_id = 1886

final_list = []
for r in short_list:
    reaction = sco_model.reactions.get_by_id(r)
    print(f'--------\n{reaction}')
    if check_transport_reaction(reaction) == False:
        if reaction.annotation:
            if check_annotation_uniprot(reaction, taxonomy_id):
                final_list.append(r)

final_list  

In [None]:
# Test added reactions
for reaction_id in short_list:
    # Get the reaction from the sco_model model
    reaction = sco_model.reactions.get_by_id(reaction_id)
    
    # Add a copy of the reaction to your model
    model.add_reactions([reaction.copy()])

In [None]:
def optimize_medium(bigg, type, exchange):
    """
    This function does optimize based on a chemical in the certain biolog medium and add an exchange reaction if needed
    """


    # when model has the exchange reaction, no need to add a reaction before gapfilling.    
    with model:
        medium = model.medium
        medium[exchange] = 0.8
        if type.startswith("C"):
            medium["EX_glc__D_e"] = 0

        elif type.startswith("N"):
            medium["EX_nh4_e"] = 0

        elif type.startswith("P"):
            medium["EX_pi_e"] = 0
        
        elif type.startswith("S"):
            medium["EX_so4_e"] = 0

        print(f"\n Optimize {bigg}, {type} with: ")
        model.medium = medium
        print(model.medium)
        model.solver = 'cplex'
        model.objective = 'growth'
        solution = model.optimize()
        print(solution.objective_value)
    return solution 
            
            
for ind in false_negatives.index:
    optimize_medium(false_negatives['bigg'][ind], false_negatives['moa'][ind], false_negatives['exchange'][ind])



# 4. Gapfill Untestables with Sco model

In [None]:
def add_exchange(model, reference, metabolite_bigg, metabolite_name):
    """
    This function aims to add an extra exchange reaction based on metabolite to the target model using reference model.
    
    Parameters:
    -----------
    metabolite_bigg: String
    metabolite_name: String
    model: Cobra Model
    reference: Cobra Model
    """
    try:
        ex_rxn_id = f"EX_{metabolite_bigg}_e"
        ex_met_id = f"{metabolite_bigg}_e"
        cy_met_id = f"{metabolite_bigg}_c"
        print(f"Try to add {ex_rxn_id} reaction")
        if any(r.id == ex_rxn_id for r in model.reactions.query(ex_rxn_id)):
            if model.reactions.get_by_id(ex_rxn_id).boundary == False:
                model.add_boundary(model.metabolites.get_by_id(ex_met_id), type="exchange")
            return model, ex_rxn_id

        else:
            # Check if the reaction is already in reference model, 
            # if yes, just copy reaction to model,
            # if not, add a new reaction.
            
                if reference.reactions.query(ex_rxn_id) != []:
                    ex_rxn = reference.reactions.get_by_id(ex_rxn_id)
                    model.add_reactions([ex_rxn.copy()])
                    print (f"Exchange: {ex_rxn_id} is added")
                
                else:
                    # Check if the metabolites is in target model in both cytosol and external
                    # if yes, create new reaction based on reference model
                    # if not, create new metabolite
                    if any(m.id == cy_met_id for m in reference.metabolites.query(cy_met_id)):
                        ex_rxn = Reaction(id=ex_rxn_id, name=f"{metabolite_name}-exchange")
                        model.add_reactions([ex_rxn])
                        met = reference.metabolites.get_by_id(cy_met_id).copy()
                        met.id = ex_met_id
                        met.compartment = 'e'
                        ex_rxn.add_metabolites({met: -1})                        
                        print(f"Exchange: {ex_rxn_id} is added")

                    elif any(m.id == ex_met_id for m in reference.metabolites.query(ex_met_id)):
                        ex_rxn = Reaction(id=ex_rxn_id, name=f"{metabolite_name}-exchange")
                        model.add_reactions([ex_rxn])
                        met = reference.metabolites.get_by_id(ex_met_id).copy()
                        ex_rxn.add_metabolites({met: -1})                        
                        print(f"Exchange: {ex_rxn_id} is added")
                    else:
                        print(f'{metabolite_bigg} is not in reference model\n')
                        return model, None
                # Final check to see if the boundary is added
                if model.reactions.get_by_id(ex_rxn_id).boundary == False:
                    model.add_boundary(model.metabolites.get_by_id(met.id), type="exchange")
                return model, ex_rxn_id
    except:
        return model, None

In [None]:
# All Untestable Growth Conditions ()
print(f"\nUntestable conditions:\n")

# Mapping of bigg and biolog model
untestables = pd.read_csv(f"{os.getcwd()}/../untestable_metabolites.csv", converters={'Ignore': lambda x: True if x == 'TRUE' else False})

# Join two tables together
untestables = untestables.merge(agreed_bio_data[['activity', 'index', 'moa', 'bigg']], on='bigg', how='left')
untestables['ignore'] = untestables['ignore'].replace({None: False, 'FALSE': True})
untestables = untestables[["bigg","name", "moa", "ignore"]]
# untestables.to_csv('untestables.csv')
untestables

In [None]:
_model = model.copy()
with open('gapfill_reactions_sco_untestable.txt', 'w') as file:
    for ind in untestables.index:
        bigg = untestables['bigg'][ind]
        type = untestables['moa'][ind]
        name = untestables['name'][ind]

        # Only test ones that needs to be added
        if untestables['ignore'][ind] == False:
            file.write(f"Gapfilling {bigg}, {type} with:\n")
            print(f"---------------\nGapfilling {bigg}, {type} test:\n")
            # Try to add exchange reactions before gapfilling
            _model, ex_rxn = add_exchange(_model, sco_model, bigg, name)
            solution = None
            if ex_rxn:
                solution = gapfill_medium(_model, sco_model, bigg, type, ex_rxn)

            # Write gapfilling results
            if solution:
                unique_solution = set([reaction for reaction_set in solution for reaction in reaction_set])
                reactions_str = ', '.join(str(reaction.id) for reaction in unique_solution)
                file.write(reactions_str + '\n\n')
            else:
                file.write('\n\n')

In [None]:
# Initialize an empty list to store the extracted lists
gapfill_lists = []

# Open and read the file
with open('gapfill_reactions_uni_untestable.txt', 'r') as file:
    lines = file.readlines()

# Process every second line and split it into a list
for i in range(1, len(lines), 3):
    reaction_list = [item.strip() for item in lines[i].strip().split(',')]
    gapfill_lists.extend(reaction_list)

merged_lists = set(gapfill_lists)
merged_lists.discard('')

# Print the extracted lists
print(merged_lists)

In [None]:
for reaction_id in merged_lists:
    # Get the reaction from the sco_model model
    reaction = sco_model.reactions.get_by_id(reaction_id)
    
    # Add a copy of the reaction to your model
    model.add_reactions([reaction.copy()])


# 5. Compare model before and after

In [None]:
# Compare two models by reaction numbers and metabolites.
updated_model = read_sbml_model(f"{os.getcwd()}/Salb-GEM-Sco-gapfill.xml")



In [None]:
model

In [None]:
updated_model

In [None]:
sco_model

In [None]:
model.reactions.EX_co2_e.bounds= (0, 1000)

cobra.io.write_sbml_model(model, 'Salb-GEM-Sco-gapfill.xml')