# PETab Scheme

### Condition Table ✅

- ConditionId
- Species (p1,p2,..)
 
### Observable Table ✅

- ObservableId
- ObservableFormula

### Measurement Table ✅

- ObservableId
- SimulationConditionId
- Time
- Measurement

### Parameter Table ✅

- ParameterId
- Estimate
- NominalValue

In [1]:
import pandas as pd
import pyenzyme as pe
import yaml

### Condition Table

In [2]:
def _build_condition_table(enzmldoc):
    """Builds the condition table that is necessary for the PETab format"""
    condition_table = []
    for measurement in enzmldoc.measurement_dict.values():
        all_species = measurement._getAllSpecies()
        init_concs = {id: meas_data.init_conc for id, meas_data in all_species.items()}

        condition_name = {"conditionName": measurement.name}
        
        condition_table.append(
            {"conditionId": measurement.id, **condition_name, **init_concs}
        )
    
    condition_table = pd.DataFrame(condition_table)

    # Check if all values in a column are same. If the values are all the same, delete the column
    for key, _ in init_concs.items():
        if (condition_table[key] == condition_table[key][0]).all():
            del condition_table[key]
    
    return condition_table

### Measurement Table

In [3]:
def _build_measurement_table(enzmldoc):
    """Builds the measurement table that is necessary for the PETab format"""
    meas_table = []
    observable_ids, species_ids = [], []
    for measurement in enzmldoc.measurement_dict.values():
        all_species = measurement._getAllSpecies()

        for meas_data in all_species.values():
            for replicate in meas_data.replicates:
                observable_id, species_id = _add_replicate_to_table(replicate, meas_table)
                observable_ids.append(observable_id)
                species_ids.append(species_id)
            
    return pd.DataFrame(meas_table), observable_ids, species_ids

def _add_replicate_to_table(replicate, meas_table):
    """Parses a Replicate object and adds it to a measurement table"""
    
    observable_id = replicate.species_id + f"_{replicate.data_type.value}"
    condition_id = replicate.measurement_id
    replicate_id = replicate.id
    species_id = replicate.species_id
    
    for time_i, data_i in zip(replicate.time, replicate.data):
        meas_table.append({
            "observableId": observable_id,
            "simulationConditionId": condition_id,
            "replicateId": replicate_id,
            "measurement": data_i,
            "time": time_i,
            "noiseParameters": "1;sd_" + species_id
        })
        
    return (observable_id, replicate.species_id)

### Parameters Table

In [4]:
PARAMETER_MAPPING = {
    "name": "parameterId",
    "constant": "estimate",
    "upper": "upperBound",
    "lower": "lowerBound",
    "initial_value": "nominalValue",
    "value": "parameterScale"
}

In [5]:
def _build_parameters_table(enzmldoc, parameter_scale: str): 
    """Builds the parameters table that is necessary for the PETab format"""

    assert isinstance(parameter_scale, str), 'parameter_scale is not a string'
    
    parameters = enzmldoc.exportKineticParameters(as_dataframe=False)

    parameter_table = []
    
    for _, row in parameters.iterrows():
        new_row = {}
        for key, value in row.items():

            if value is None or key not in PARAMETER_MAPPING:
                continue

            name = PARAMETER_MAPPING[key]

            if name == "parameterScale":
                value = parameter_scale # Parameterscale

            if name == "nominalValue":
                nominalVal = value
                nominalName = name

            if name == "estimate":
                # Turn booleans into integers
                value = int(not value)
                
                # Add nominalValue if estimate is 0
                if value == 0:
                    new_row[nominalName] = nominalVal
                else: 
                    new_row[nominalName] = ""
                    
            # So nominalValue is not in second place in resulting table
            if name != "nominalValue":
                new_row[name] = value
            
        parameter_table.append(new_row)
    
    # Add noiseParameter for noiseFormula -> how to choose upperbound/lowerbound?
    reactants = enzmldoc.reactant_dict
    for reactant in reactants:
        new_row = {}
        observable_name = ("sd_" + reactants[reactant].id)  
        new_row["parameterId"] = observable_name
        new_row["parameterScale"] = parameter_scale
        new_row["estimate"] = 1
        new_row["nominalValue"] = ""
        parameter_table.append(new_row)


        
    return pd.DataFrame(parameter_table).drop_duplicates()

### Observable Table

In [6]:
def _build_observable_table(enzmldoc, observable_ids, species_ids):
    """Builds the observable table that is necessary for the PETab format"""
    
    observable_name = []
    reactants = enzmldoc.reactant_dict
    for reactant in reactants:
        observable_name.append(reactants[reactant].name)    
    
    return pd.DataFrame([
        {
            "observableId": observable_id,
            "observableName": observable_name,
            "observableFormula": species_id,
            "noiseFormula": "noiseParameter2_" + observable_id + " + noiseParameter1_" + observable_id + " * " + observable_name,
            "noiseDistribution": "normal"
        }
        for observable_id, species_id, observable_name in zip(observable_ids, species_ids, observable_name)
    ]).drop_duplicates()

### Create SBML

In [7]:
def _build_sbml_file(enzmldoc):
    enzmldoc = enzmldoc.toXMLString()
    enzmldoc = enzmldoc.split(">")
    editedDoc = ""
    for row in enzmldoc:
        print(row)
    return enzmldoc

In [None]:
sbml_file = _build_sbml_file(synth)
print(sbml_file)

### Workflow

In [8]:
model4 = pe.EnzymeMLDocument.fromFile("../synthetic_data/Model_4.omex")
synth = pe.EnzymeMLDocument.fromFile("../synthetic_data/synthetic_data.omex")
synth_below_km = pe.EnzymeMLDocument.fromFile("../synthetic_data/synthetic_data_below_Km.omex")
synth_above_km = pe.EnzymeMLDocument.fromFile("../synthetic_data/synthetic_data_above_Km.omex")

In [9]:
model4.applyModelInitialization("init_values.yaml", True)
synth.applyModelInitialization("../synthetic_data/synthetic_data_init_values.yaml", True)
synth_below_km.applyModelInitialization("../synthetic_data/synthetic_data_init_values.yaml", True)
synth_above_km.applyModelInitialization("../synthetic_data/synthetic_data_init_values.yaml", True)

In [10]:
condition_table = _build_condition_table(synth_below_km)
display(condition_table)

Unnamed: 0,conditionId,conditionName,s0
0,m0,Substrate measurement 1,0.5
1,m1,Substrate measurement 2,1.0


In [11]:
parameter_table = _build_parameters_table(synth_below_km, parameter_scale="lin")
display(parameter_table)

Unnamed: 0,parameterId,parameterScale,upperBound,lowerBound,nominalValue,estimate
0,vmax,lin,150.0,0.01,,1
1,Km,lin,40.0,0.001,,1
2,sd_s0,lin,,,,1
3,sd_s1,lin,,,,1


In [12]:
measurement_table, ob_ids, spec_ids = _build_measurement_table(synth_below_km)
display(measurement_table)

Unnamed: 0,observableId,simulationConditionId,replicateId,measurement,time,noiseParameters
0,s0_conc,m0,repl_substrate_1,0.5,0.0,1;sd_s0
1,s0_conc,m0,repl_substrate_1,0.046,1.0,1;sd_s0
2,s0_conc,m0,repl_substrate_1,0.004,2.0,1;sd_s0
3,s0_conc,m0,repl_substrate_1,0.0,3.0,1;sd_s0
4,s0_conc,m0,repl_substrate_1,0.0,4.0,1;sd_s0
5,s0_conc,m0,repl_substrate_1,0.0,5.0,1;sd_s0
6,s0_conc,m0,repl_substrate_1,0.0,6.0,1;sd_s0
7,s0_conc,m0,repl_substrate_1,0.0,7.0,1;sd_s0
8,s0_conc,m0,repl_substrate_1,0.0,8.0,1;sd_s0
9,s0_conc,m0,repl_substrate_1,-0.0,9.0,1;sd_s0


In [13]:
observable_table = _build_observable_table(synth_below_km, ob_ids, spec_ids)
display(observable_table)

Unnamed: 0,observableId,observableName,observableFormula,noiseFormula,noiseDistribution
0,s0_conc,Substrate,s0,noiseParameter2_s0_conc + noiseParameter1_s0_c...,normal
1,s0_conc,Product,s0,noiseParameter2_s0_conc + noiseParameter1_s0_c...,normal


In [None]:
sbml_file = _build_sbml_file(synth_below_km)
print(sbml_file)

In [14]:
petab_yaml = {
    "format_version": 1,
    "parameter_file": "parameter_table.tsv",
    "problems": [
        {
            "sbml_files": ["model.xml"],
            "measurement_files": ["measurement_table.tsv"],
            "condition_files": ["condition_table.tsv"],
            "observable_files": ["observable_table.tsv"]
        }
    ]
}

In [21]:
content = [
    ("condition_table.tsv", condition_table.to_csv(sep="\t", index=False)),
    ("measurement_table.tsv", measurement_table.to_csv(sep="\t", index=False)),
    ("observable_table.tsv", observable_table.to_csv(sep="\t", index=False)),
    ("parameter_table.tsv", parameter_table.to_csv(sep="\t", index=False)),
    ("model.xml", synth_below_km.toXMLString),#sbml_file),
    ("problems.yaml", yaml.safe_dump(petab_yaml))
]

In [22]:
import io
import zipfile

zip_buffer = io.BytesIO()

with zipfile.ZipFile(zip_buffer, "a",
                     zipfile.ZIP_DEFLATED, False) as zip_file:
    for file_name, data in content:
        zip_file.writestr(file_name, data)

with open('synth_data_below_km.zip', 'wb') as f:
    f.write(zip_buffer.getvalue())

TypeError: object of type 'method' has no len()