# 1: Dataset generation component

This handles the generation of training and testing datasets in the shape of _(n_init_conditions * n_sims_per_condition, n_steps + 1, n_species + time)_.

## Step 0: Setup

In [21]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
from simulation_manager import SimulationManager

import pickle
import os
from time import time

In [23]:
current_dir = os.getcwd()

In [24]:
def gen_per_config(name, path, configs, zero_perturb_prob=0.9, zero_species_range=(0, 10), plot=False):
    """
    Generate train and test datasets for the provided CRN with a set of configurations.
    Example `configs` dict:
        {"case_1": {
            "end_time": 32,
            "n_steps": 16,
            "n_init_conditions": 100,
            "n_sims_per_init_condition": 500
            }
        }

    Datasets are stored in the "ssa_datasets" directory.
    
    Parameters:
    - name: string representing the name of the CRN
    - path: string representing the filepath to the CRN definition (Antimony .txt or SML .xml)
    - configs: dict containing the different generation configurations
    - zero_perturb_prob: float representing the probability of perturbing 0-valued concentrations
    - zero_species_range: tuple representing the range of perturbation
    - plot: boolean representing whether to plot the generated trajectories (1 plot per initial condition)
    """
    simulation_times = dict()
    if not os.path.exists("ssa_datasets"):
        os.makedirs("ssa_datasets")
        
    for case in configs:
        config = configs[case]
        end_time = config["end_time"]
        n_steps = config["n_steps"]
        n_init_conditions = config["n_init_conditions"]
        n_sims_per_init_condition = config["n_sims_per_init_condition"]

        config_name = f"{name}_{n_steps}_{end_time}_{n_init_conditions}_{n_sims_per_init_condition}"
    
        sm = SimulationManager(
            path_to_sbml=path,
            model_name=name,
            n_init_conditions=n_init_conditions,
            n_sims_per_init_condition=n_sims_per_init_condition,
            end_time=end_time,
            n_steps=n_steps
        )
    
        # training data
        init_conditions = sm.get_randomized_initial_conditions(
            zero_perturb_prob=zero_perturb_prob,
            zero_perturb_range=zero_species_range
        )
    
        start_time = time()
        data = sm.simulate(
            init_conditions
        )
        time_taken = time() - start_time
        simulation_times[case] = time_taken
        with open(f"ssa_datasets/{config_name}_train.pickle", "wb") as f:
            pickle.dump(data, f)
        print(f"Generated training data of shape {data.shape}.")

        if plot:
            print("Plotting...")
            sm.plot_simulations(
                f"plots/{config_name}__ssa",
                data,
                n_init_conditions,
                n_sims_per_init_condition,
                sm.get_column_names()
            )
    
        # validation data
        init_conditions = sm.get_randomized_initial_conditions(
            zero_perturb_prob=zero_perturb_prob,
            zero_perturb_range=zero_species_range
        )
    
        data = sm.simulate(
            init_conditions
        )
        with open(f"ssa_datasets/{config_name}_test.pickle", "wb") as f:
            pickle.dump(data, f)
        print(f"Generated test data of shape {data.shape}.")
        
        print(f"\nFinished simulation for {case}.\n")
    
    with open(f"{name}_ssa_simulation_times.pickle", "wb") as f:
        pickle.dump(simulation_times, f)

## Step 1: Multifeedback model

In [25]:
relative_path = "crn_models/1_multifeedback.txt"
path = os.path.join(current_dir, relative_path)
name = "multifeedback"

In [26]:
gen_config = {
    # depth
    "case_1": {
        "end_time": 32,
        "n_steps": 16,
        "n_init_conditions": 100,
        "n_sims_per_init_condition": 200
    },
    # breadth
    "case_2": {
        "end_time": 32,
        "n_steps": 16,
        "n_init_conditions": 200,
        "n_sims_per_init_condition": 100
    }
}

In [27]:
gen_per_config(name, path, gen_config, plot=True)

>> Performing stochastic simulation for initial condition 1 / 100.
>> Performing stochastic simulation for initial condition 2 / 100.
>> Performing stochastic simulation for initial condition 3 / 100.
>> Performing stochastic simulation for initial condition 4 / 100.
>> Performing stochastic simulation for initial condition 5 / 100.
>> Performing stochastic simulation for initial condition 6 / 100.
>> Performing stochastic simulation for initial condition 7 / 100.
>> Performing stochastic simulation for initial condition 8 / 100.
>> Performing stochastic simulation for initial condition 9 / 100.
>> Performing stochastic simulation for initial condition 10 / 100.
>> Performing stochastic simulation for initial condition 11 / 100.
>> Performing stochastic simulation for initial condition 12 / 100.
>> Performing stochastic simulation for initial condition 13 / 100.
>> Performing stochastic simulation for initial condition 14 / 100.
>> Performing stochastic simulation for initial condition

## Step 2: Repressilator model

In [28]:
relative_path = "crn_models/2_repressilator.txt"
path = os.path.join(current_dir, relative_path)
name = "repressilator"

In [29]:
gen_config = {
    # breadth
    "case_1": {
        "end_time": 128,
        "n_steps": 32,
        "n_init_conditions": 200,
        "n_sims_per_init_condition": 100
    }
}

In [30]:
gen_per_config(name, path, gen_config, plot=True)

>> Performing stochastic simulation for initial condition 1 / 200.
>> Performing stochastic simulation for initial condition 2 / 200.
>> Performing stochastic simulation for initial condition 3 / 200.
>> Performing stochastic simulation for initial condition 4 / 200.
>> Performing stochastic simulation for initial condition 5 / 200.
>> Performing stochastic simulation for initial condition 6 / 200.
>> Performing stochastic simulation for initial condition 7 / 200.
>> Performing stochastic simulation for initial condition 8 / 200.
>> Performing stochastic simulation for initial condition 9 / 200.
>> Performing stochastic simulation for initial condition 10 / 200.
>> Performing stochastic simulation for initial condition 11 / 200.
>> Performing stochastic simulation for initial condition 12 / 200.
>> Performing stochastic simulation for initial condition 13 / 200.
>> Performing stochastic simulation for initial condition 14 / 200.
>> Performing stochastic simulation for initial condition