# Get the main Seed2LP solution analyses
This notebook presents the global data of seed2lp, such as number of networks having solutions, number of networks having at least one solution validating FBA, number of networks with all solution validating FBA.

To run correctly this notebook and have the same results as the paper, you must first download the raw results here: [https://doi.org/10.57745/OS1JND](https://doi.org/10.57745/OS1JND)

This notebook is written with the hierarchy of downloaded files, if you want to try it with the test form the run notebooks, it is needed to first restructure your data to match the hierarchy of downloaded files.

We suppose here that the downloaded files are in a directory named "analyses", this directory path can be changed to your directory path where the data are saved.

# Variable to change (if wanted)

In [1]:
analyse_dir = "../../analyses"

# Initialisation and functions

In [2]:
import pandas as pd
import os

In [3]:
s2lp_results_reas_dir = os.path.join(analyse_dir, "results", "s2lp_reasoning")
s2lp_results_hyb_cobra_dir = os.path.join(analyse_dir, "results", "s2lp_hyb_cobra")

In [4]:
def get_fluxes(directory:str, mode:str, optim:str=None):
    flux_all=pd.DataFrame(columns=['species', 'biomass_reaction', 'solver_type', 'search_mode',
                                     'search_type', 'accumulation', 'model', 'size', 'lp_flux', 'cobra_flux_init',
                                     'cobra_flux_no_import', 'cobra_flux_seeds', 'cobra_flux_demands',
                                     'has_flux', 'has_flux_seeds', 'has_flux_demands', 'timer'])
    flux_all['accumulation'] = flux_all['accumulation'].astype('bool')
    flux_all['has_flux'] = flux_all['has_flux'].astype('bool')
    flux_all['has_flux_seeds'] = flux_all['has_flux_seeds'].astype('bool')
    flux_all['has_flux_demands'] = flux_all['has_flux_demands'].astype('bool')

    for dirpath, _, filenames in os.walk(directory):
        for filename in [f for f in filenames if (f.endswith("_fluxes.tsv") or f.endswith("_fluxes_from_result.tsv"))]:
            # By default in this notebook we want the no accumulation mode for seed2lp results
            if  "_no_accu_" in filename \
                and   ((mode == "full" and "_fn_" in filename) \
                    or (mode == "target" and "_tgt_" in filename))\
                or mode == "netseed":
                file_path=os.path.join(dirpath, filename)
                current_df = pd.read_csv(file_path, sep='\t', lineterminator='\n')
                current_df['accumulation'] = current_df['accumulation'].astype('bool')
                current_df['has_flux'] = current_df['has_flux'].astype('bool')
                current_df['has_flux_seeds'] = current_df['has_flux_seeds'].astype('bool')
                current_df['has_flux_demands'] = current_df['has_flux_demands'].astype('bool')
                flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
    flux_all = flux_all[flux_all["model"]!="model_one_solution"]
    flux_all = flux_all[flux_all["model"]!="model_one_solution"]
    if optim=="submin":
        return flux_all[flux_all["search_mode"]=="Subset Minimal"]
    elif optim=="min":
        return flux_all[flux_all["search_mode"]=="Minimize"]
    else:
        return flux_all

In [5]:
def get_all_same_validation_fba(table, type):
    count=0
    total=0
    for _,line in table.iterrows():
        if line[type] == line["Total_flux"]:
            count += 1
            total += 1
        else:
            total += 1
    return total, count

def get_mixed(table):
    count=0
    for _,line in table.iterrows():
        if line["False_flux"] != line["Total_flux"] and line["True_flux"] != line["Total_flux"] :
            count += 1
    return count

In [6]:
def get_separate_data(table_reasoning, table_hybrid):
    table_hybrid["solver_type"] = table_hybrid["solver_type"].str.replace('REASONING  GUESS-CHECK-DIVERSITY', 'REASONING GUESS-CHECK DIVERSITY')
    table_hybrid["solver_type"] = table_hybrid["solver_type"].str.replace('REASONING  GUESS-CHECK', 'REASONING GUESS-CHECK')
    table_hybrid["solver_type"] = table_hybrid["solver_type"].str.replace('REASONING  FILTER', 'REASONING FILTER')

    # CLASSIC
    table_reasoning = table_reasoning[table_reasoning["solver_type"]=="REASONING"]
    
    # FILTER
    table_filter = table_hybrid[table_hybrid["solver_type"]=="REASONING FILTER"]

    # GUESS_CHECK
    table_gc = table_hybrid[table_hybrid["solver_type"]=="REASONING GUESS-CHECK"]

    # GUESS_CHECK_DIV
    table_gcd = table_hybrid[table_hybrid["solver_type"]=="REASONING GUESS-CHECK DIVERSITY"]

    return table_reasoning, table_filter, table_gc, table_gcd

In [7]:
def create_table_plot(table,column_name):
    new_table = table.groupby(['species'])[column_name].agg('count').reset_index()
    new_table=new_table.rename(columns={column_name: "Total_flux"})
    new_true = table[table[column_name]==True].groupby(['species'])[column_name].agg('count').reset_index()
    new_true=new_true.rename(columns={column_name: "True_flux"})
    new_false = table[table[column_name]==False].groupby(['species'])[column_name].agg('count').reset_index()
    new_false=new_false.rename(columns={column_name: "False_flux"})
    new_table=pd.merge(new_table,new_true, how='left', on=['species'])
    new_table=pd.merge(new_table,new_false, how='left', on=['species'])
    new_table=new_table.fillna(0)
    new_table=new_table.fillna(0)
    new_table['True_flux']=new_table['True_flux'].astype(int)
    new_table['False_flux']=new_table['False_flux'].astype(int)
    return new_table

In [8]:
def get_sol_FBA_data(table_reasoning, table_filter, table_gc, table_gcd):
    # CLASSIC
    nb_networks_reasoning, all_true_reasoning,  = get_all_same_validation_fba(table_reasoning, "True_flux")
    mixed_reasoning = get_mixed(table_reasoning)
    
    # FILTER
    nb_networks_filter, all_true_filter,  = get_all_same_validation_fba(table_filter, "True_flux")
    mixed_filter = get_mixed(table_filter)

    # GUESS_CHECK
    nb_networks_gc, all_true_gc,  = get_all_same_validation_fba(table_gc, "True_flux")
    mixed_gc = get_mixed(table_gc)

    # GUESS_CHECK_DIV
    nb_networks_gcd, all_true_gcd,  = get_all_same_validation_fba(table_gcd, "True_flux")
    mixed_gcd = get_mixed(table_gcd)

    df = pd.DataFrame([["Reasoning", nb_networks_reasoning, mixed_reasoning, all_true_reasoning],
                       ["Hybrid-filter", nb_networks_filter, mixed_filter, all_true_filter],
                       ["Hybrid-GC", nb_networks_gc, mixed_gc, all_true_gc],
                       ["Hybrid-GC-Div", nb_networks_gcd, mixed_gcd, all_true_gcd]],
                       columns=["Solving mode", "Nb. of net. with sol.", "Nb. of net. with ≥ 1 sol. FBA", "Nb of net with all sol. FBA"])
    return df
    


# Get data

In [9]:
flux_reasoning_target_submin = get_fluxes(s2lp_results_reas_dir, "target", "submin")

In [10]:
flux_hyb_cobra_target_submin = get_fluxes(s2lp_results_hyb_cobra_dir, "target", "submin")

  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not flux_all.empty else None, current_df], ignore_index=True)
  flux_all=pd.concat([flux_all if not fl

In [11]:
flux_reasoning, flux_filter, flux_gc, flux_gcd = get_separate_data(flux_reasoning_target_submin, flux_hyb_cobra_target_submin)

In [12]:
reasoning_flux=create_table_plot(flux_reasoning,'has_flux')
filter_flux=create_table_plot(flux_filter,'has_flux')
gc_flux=create_table_plot(flux_gc,'has_flux')
gcd_flux=create_table_plot(flux_gcd,'has_flux')

## Number of networks satisfying FBA constraints

In [13]:
data = get_sol_FBA_data(reasoning_flux, filter_flux, gc_flux, gcd_flux)
data

Unnamed: 0,Solving mode,Nb. of net. with sol.,Nb. of net. with ≥ 1 sol. FBA,Nb of net with all sol. FBA
0,Reasoning,107,73,10
1,Hybrid-filter,71,0,71
2,Hybrid-GC,90,0,90
3,Hybrid-GC-Div,98,0,98


In [15]:
_, all_false  = get_all_same_validation_fba(reasoning_flux, "False_flux")
print("Reasoning - nb of networks with no sol FBA: ",all_false)

Reasoning - nb of networks with no sol FBA:  24
