# Using Tellurium-generated dataset as input for StochNetV2

We replace the `build_simulation_dataset` utility of StochNetV2 with a custom Tellurium-based dataset generation utility.

In [256]:
import tellurium as te
import numpy as np
import pandas as pd

from importlib import import_module
from pathlib import Path
import json

from stochnet_v2.utils.file_organisation import ProjectFileExplorer
from stochnet_v2.dataset.simulation_gillespy import build_simulation_dataset
from stochnet_v2.dataset.dataset import DataTransformer
from stochnet_v2.static_classes.trainer import ToleranceDropLearningStrategy
from stochnet_v2.dynamic_classes.model import StochNet
from stochnet_v2.static_classes.trainer import Trainer

## Tellurium dataset generator

In [257]:
class TeDatasetGenerator:
    
    def __init__(
        self,
        filename="example.xml",
        n_initial_settings=10,
        n_simulations_per_setting=10,
        steps=11,
        endtime=1.0
    ):
        self.filename = filename
        self.n_initial_settings = n_initial_settings
        self.n_simulations_per_setting = n_simulations_per_setting
        self.steps = steps
        self.endtime = endtime
        
        self.model = self.set_up_model()
        self.init_concentrations = self.randomize_species_concentrations()
        self.randomized_parameters = self.randomize_parameters()
        self.variable_names = self.get_variable_names()
        
    def simulate(self):
        results = []
        for init_setting in range(self.n_initial_settings):
            for sim_iteration in range(self.n_simulations_per_setting):
                self.model.reset()
                self.assign_custom_values_to_model(self.init_concentrations[init_setting])
                self.assign_custom_values_to_model(self.randomized_parameters[init_setting])
                sim = self.model.simulate(0.0, self.endtime, self.steps)

                # Add the randomized parameters as new columns
                for param_name, param_value in self.randomized_parameters[init_setting].items():
                    param_column = np.full((sim.shape[0], 1), param_value)
                    sim = np.hstack((sim, param_column))

                results.append(sim)

        return np.concatenate([np.expand_dims(a, axis=0) for a in results], axis=0)
        
        
    def set_up_model(self):
        model = te.loads(self.filename)
        model.integrator = "gillespie"
        model.integrator.seed = 42
        
        return model
    
    
    def randomize_species_concentrations(self):
        """
        Returns: list of n dictionaries, where keys are species.
        """
        species_names = self.model.getFloatingSpeciesConcentrationIds()
        species_values = self.model.getFloatingSpeciesConcentrations()

        random_concentrations = []
        for _ in range(self.n_initial_settings):
            iteration_concentrations = {}

            for name, value in zip(species_names, species_values):
                low = max(0, int(value / 2))
                high = int(value * 2)
                iteration_concentrations[name] = float(np.random.randint(low, high))

            random_concentrations.append(iteration_concentrations)

        return random_concentrations
    
        
    def randomize_parameters(self, sigma=0.1):
        """
        Returns: list of n dictionaries, where keys are parameters.
        """
        parameter_names = self.model.getGlobalParameterIds()
        parameter_values = self.model.getGlobalParameterValues()

        random_parameters = []
        num_parameters = len(parameter_names)

        for i in range(self.n_initial_settings):
            iteration_parameters = {}

            for j, (name, value) in enumerate(zip(parameter_names, parameter_values)):
                if i % num_parameters == j:
                    shift = np.random.uniform(-sigma, sigma) * value
                    iteration_parameters[name] = value + shift
                else:
                    # keep the default value for other parameters
                    iteration_parameters[name] = value

            random_parameters.append(iteration_parameters)

        return random_parameters


    def assign_custom_values_to_model(self, value_dict):
        """
        value_dict: can be species concentrations, parameters values, etc.
        Returns: the model with assigned values.
        """
        for prop_name, prop_value in value_dict.items():
            self.model[prop_name] = prop_value


    def get_variable_names(self):
        names = ["time"]
        names.extend(self.model.getFloatingSpeciesConcentrationIds())
        names.extend(self.model.getGlobalParameterIds())

        return names
    
    def get_initial_concentrations_array(self):
        return np.array([list(d.values()) for d in self.init_concentrations])
    
    def get_randomized_params_dict(self):
        params = {}
        for d in self.randomized_parameters:
            for k, v in d.items():
                if k not in params:
                    params[k] = []
                params[k].append(v)
        return params

    def get_merged_concentrations_and_params(self):
        keys = list(self.randomized_parameters[0].keys())
        new_rows = []
        
        for param_dict, row in zip(self.randomized_parameters, self.get_initial_concentrations_array()):
            new_row = np.append(row, [param_dict[key] for key in keys])
            new_rows.append(new_row)

        new_concentrations = np.array(new_rows)

        return new_concentrations

## Dataset generation and transformation

In [260]:
name = "SIR"
timestep = 0.1
endtime = 10.0
n_initial_settings = 100
n_simulations_per_setting = 10

params = ["gamma", "beta"]

model_name = name
dataset_id = name
model_id = name

In [261]:
project_folder = Path("").parent.resolve() / model_name
project_explorer = ProjectFileExplorer(project_folder)
dataset_explorer = project_explorer.get_dataset_file_explorer(timestep, dataset_id)
model_explorer = project_explorer.get_model_file_explorer(timestep, model_id)

### Generate initial settings

In [242]:
# SN settings generation using custom network definition via GillesPy2
CRN_module = import_module(model_name)
CRN_class = getattr(CRN_module, model_name)

settings = CRN_class.get_initial_settings(n_initial_settings)
print(settings)

[[ 99. 196.  48.]
 [ 53. 155.  66.]
 [ 87. 142.  79.]
 [121. 150.  98.]
 [144.  98. 187.]
 [ 81. 144. 108.]
 [137. 157. 180.]
 [ 56. 161.  72.]
 [104.  80.  72.]
 [161.  84. 139.]]


In [264]:
# Tellurium settings generation
te_gen = TeDatasetGenerator(
    n_initial_settings=100,
    n_simulations_per_setting=10,
    steps=11,
    endtime=10.0
)
te_settings = te_gen.get_initial_concentrations_array()
print(te_settings)

[[ 73. 131. 125.]
 [152. 188. 105.]
 [155.  71. 173.]
 [ 97. 116. 134.]
 [ 56. 189.  61.]
 [ 60. 199. 114.]
 [121. 159.  82.]
 [103.  51.  96.]
 [134. 191.  69.]
 [182. 127. 124.]
 [155. 171. 174.]
 [ 51. 161. 155.]
 [ 57. 120.  72.]
 [146. 163. 154.]
 [184. 152. 141.]
 [ 75. 190.  82.]
 [172.  65.  59.]
 [170.  62. 114.]
 [ 54. 182. 180.]
 [186. 107.  69.]
 [130. 128. 107.]
 [ 92.  91. 191.]
 [116. 194.  54.]
 [161. 178. 127.]
 [115. 140. 146.]
 [110. 197. 108.]
 [195.  71. 139.]
 [188. 174.  62.]
 [113.  97. 195.]
 [ 81. 102. 143.]
 [107.  67.  67.]
 [ 91.  96. 146.]
 [181. 187.  93.]
 [ 68. 178. 183.]
 [191. 102. 138.]
 [ 64. 112.  99.]
 [138.  69.  62.]
 [133. 179. 123.]
 [ 76.  83.  96.]
 [145. 142. 124.]
 [157. 175. 192.]
 [ 74. 163.  69.]
 [132. 189. 187.]
 [ 61. 175.  88.]
 [146.  68.  55.]
 [193. 174. 116.]
 [142. 125.  64.]
 [ 78.  87. 163.]
 [118. 129. 128.]
 [176. 142. 132.]
 [ 54. 174.  91.]
 [171. 106. 193.]
 [ 79. 168. 164.]
 [164. 173.  66.]
 [144. 114. 122.]
 [169. 137

In [265]:
# save the Tellurium-generated settings
np.save(dataset_explorer.settings_fp, te_settings)

### Generate dataset

In [124]:
# SN dataset generation
dataset = build_simulation_dataset(
    model_name,
    n_initial_settings,
    n_simulations_per_setting,
    timestep,
    endtime,
    dataset_explorer.dataset_folder,
    params_to_randomize=params,
)

            Model.set_parameter has been deprecated.  Future releases of GillesPy2 may
            not support this feature.  Parameter.expression should only be set in the constructor.
            
            Model.set_parameter has been deprecated.  Future releases of GillesPy2 may
            not support this feature.  Parameter.expression should only be set in the constructor.
            
            Model.set_parameter has been deprecated.  Future releases of GillesPy2 may
            not support this feature.  Parameter.expression should only be set in the constructor.
            
            Model.set_parameter has been deprecated.  Future releases of GillesPy2 may
            not support this feature.  Parameter.expression should only be set in the constructor.
            
            Model.set_parameter has been deprecated.  Future releases of GillesPy2 may
            not support this feature.  Parameter.expression should only be set in the constructor.
            
     

In [129]:
print(f"dataset shape: {dataset.shape}\n")
print(f"first simulation:\n{dataset[0]}")

dataset shape: (100, 11, 6)

first simulation:
[[  0.          89.         186.          50.           1.01743801
    3.        ]
 [  0.1         88.         196.          41.           1.01743801
    3.        ]
 [  0.2         83.         204.          38.           1.01743801
    3.        ]
 [  0.3         75.         216.          34.           1.01743801
    3.        ]
 [  0.4         69.         223.          33.           1.01743801
    3.        ]
 [  0.5         71.         225.          29.           1.01743801
    3.        ]
 [  0.6         66.         232.          27.           1.01743801
    3.        ]
 [  0.7         59.         240.          26.           1.01743801
    3.        ]
 [  0.8         55.         246.          24.           1.01743801
    3.        ]
 [  0.9         52.         250.          23.           1.01743801
    3.        ]
 [  1.          48.         256.          21.           1.01743801
    3.        ]]


In [266]:
# Tellurium dataset generation
te_dataset = te_gen.simulate()

In [274]:
print(f"tellurium dataset shape: {te_dataset.shape}\n")
print(f"first simulation:\n{te_dataset[0]}")

tellurium dataset shape: (1000, 11, 6)

first simulation:
[[  0.        73.       131.       125.         2.984349   1.      ]
 [  1.        65.       197.        67.         2.984349   1.      ]
 [  2.        35.       252.        42.         2.984349   1.      ]
 [  3.        14.       278.        37.         2.984349   1.      ]
 [  4.         5.       290.        34.         2.984349   1.      ]
 [  5.         2.       294.        33.         2.984349   1.      ]
 [  6.         0.       296.        33.         2.984349   1.      ]
 [  7.         0.       296.        33.         2.984349   1.      ]
 [  8.         0.       296.        33.         2.984349   1.      ]
 [  9.         0.       296.        33.         2.984349   1.      ]
 [ 10.         0.       296.        33.         2.984349   1.      ]]


In [275]:
# save the Tellurium-generated dataset
np.save(dataset_explorer.dataset_fp, te_dataset)

### Dataset transformation and preparation for NN input

In [276]:
dt = DataTransformer(
    dataset_explorer.dataset_fp,
    with_timestamps=True,
    nb_randomized_params=len(params),
)

In [277]:
dt.save_data_for_ml_hdf5(
    dataset_folder=dataset_explorer.dataset_folder,
    nb_past_timesteps=1,
    test_fraction=0.2,
    keep_timestamps=False,
    rescale=True,
    positivity=False,
    shuffle=True,
    slice_size=100,
    force_rewrite=True,
)

dataset.dataset - INFO - Fitting scaler, positivity=False


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 1021.43it/s]

dataset.dataset - INFO - Train data saved to /Users/ivan/Developer/git/deep-abstractions/tool_migration/stochnetv2/SIR/dataset/data/0.1/SIR/train_rescaled.hdf5, 
Shapes: x: (8000, 1, 5), y: (8000, 3)
dataset.dataset - INFO - Test data saved to /Users/ivan/Developer/git/deep-abstractions/tool_migration/stochnetv2/SIR/dataset/data/0.1/SIR/test_rescaled.hdf5, 
Shapes: x: (2000, 1, 5), y: (2000, 3)





## NN

### Training

In [278]:
# NN Architecture Configuration
body_config_path = model_explorer.body_config_fp
mixture_config_path = model_explorer.mixture_config_fp

body_hidden_size = 30
body_n_blocks = 2
body_activation = "relu"
body_regularizer = "none"
body_kernel_constraint = "none"
body_bias_constraint = "none"
body_kernel_regularizer = "l2"
body_bias_regularizer = "l2"


components_hidden_size = "none"
n_normal_diag = 6
n_normal_tril = 0
n_log_normal_tril = 0
components_activation = "none"
components_regularizer = "none"
components_kernel_constraint = "none"
components_bias_constraint = "none"
components_kernel_regularizer = "l2"
components_bias_regularizer = "l2"

body_config = {
    "body_fn_name": "body_b",
    "block_name": "a",
    "hidden_size": body_hidden_size,
    "n_blocks": body_n_blocks,
    "use_batch_norm": False,
    "activation": body_activation,
    "activity_regularizer": body_regularizer,
    "kernel_constraint": body_kernel_constraint,
    "kernel_regularizer": body_kernel_regularizer,
    "bias_constraint": body_bias_constraint,
    "bias_regularizer": body_bias_regularizer,
}

categorical_config = {
    "hidden_size": components_hidden_size,
    "activation": components_activation,
    "coeff_regularizer": "none",
    "kernel_constraint": components_kernel_constraint,  # "maxnorm"
    "kernel_regularizer": components_kernel_regularizer,
    "bias_constraint": components_bias_constraint,  # "maxnorm"
    "bias_regularizer": components_bias_regularizer,
}

normal_diag_config = {
    "hidden_size": components_hidden_size,
    "activation": components_activation,
    "mu_regularizer": components_regularizer,
    "diag_regularizer": "l2",
    "kernel_constraint": components_kernel_constraint,
    "kernel_regularizer": components_kernel_regularizer,
    "bias_constraint": components_bias_constraint,
    "bias_regularizer": components_bias_regularizer,
}

normal_tril_config = {
    "hidden_size": components_hidden_size,
    "activation": components_activation,
    "mu_regularizer": components_regularizer,
    "diag_regularizer": components_regularizer,
    "sub_diag_regularizer": components_regularizer,
    "kernel_constraint": components_kernel_constraint,
    "kernel_regularizer": components_kernel_regularizer,
    "bias_constraint": components_bias_constraint,
    "bias_regularizer": components_bias_regularizer,
}

log_normal_tril_config = {
    "hidden_size": components_hidden_size,
    "activation": components_activation,
    "mu_regularizer": components_regularizer,
    "diag_regularizer": components_regularizer,
    "sub_diag_regularizer": components_regularizer,
    "kernel_constraint": components_kernel_constraint,
    "kernel_regularizer": components_kernel_regularizer,
    "bias_constraint": components_bias_constraint,
    "bias_regularizer": components_bias_regularizer,
}

# Write the configurations to disk
mixture_config = (
    [["categorical", categorical_config]]
    + [["normal_diag", normal_diag_config] for i in range(n_normal_diag)]
    + [["normal_tril", normal_tril_config] for i in range(n_normal_tril)]
    + [["log_normal_tril", log_normal_tril_config] for i in range(n_log_normal_tril)]
)

with open(body_config_path, "w+") as f:
    json.dump(body_config, f, indent="\t")

with open(mixture_config_path, "w+") as f:
    json.dump(mixture_config, f, indent="\t")

learning_strategy = ToleranceDropLearningStrategy(
    optimizer_type="adam",
    initial_lr=1e-4,
    lr_decay=0.3,
    epochs_tolerance=7,
    minimal_lr=1e-7,
)

In [279]:
# Training configuration
n_epochs = 100
batch_size = 256
add_noise = True
stddev = 0.01
dataset_kind = "hdf5"

# NN definition and training
nn = StochNet(
    nb_past_timesteps=1,
    nb_features=3,
    nb_randomized_params=len(params),
    project_folder=project_folder,
    timestep=timestep,
    dataset_id=dataset_id,
    model_id=model_id,
)

ckpt_path = None

ckpt_path = Trainer().train(
    nn,
    n_epochs=n_epochs,
    batch_size=batch_size,
    learning_strategy=learning_strategy,
    ckpt_path=ckpt_path,
    dataset_kind=dataset_kind,
    add_noise=add_noise,
    stddev=stddev,
)

with open("best_ckpt.txt", "w") as f:
    f.write(ckpt_path)

static_classes.nn_bodies - INFO - 
 ** Building 'body_b' body, hidden size: 30 
    with 2 of 'a' block 
    activation: relu 
    activity_regularizer: none 
    kernel_constraint: none 
    kernel_regularizer: l2 
    bias_constraint: none 
    bias_regularizer: l2 
    use BatchNorm: False 
 ** 

static_classes.top_layers - DEBUG - Mixture components share nn outputs
static_classes.top_layers - DEBUG - base shape: [None, 30]
static_classes.top_layers - DEBUG - Using non-nnegative elu activation for diagonal
static_classes.top_layers - DEBUG - Using non-nnegative elu activation for diagonal
static_classes.top_layers - DEBUG - Using non-nnegative elu activation for diagonal
static_classes.top_layers - DEBUG - Using non-nnegative elu activation for diagonal
static_classes.top_layers - DEBUG - Using non-nnegative elu activation for diagonal
static_classes.top_layers - DEBUG - Using non-nnegative elu activation for diagonal
static_classes.model - INFO - Model's graph keys saved at /Users

static_classes.trainer - INFO - 
Epoch: 36
static_classes.trainer - INFO -  = Minimal loss value = 2.416475296020508,
 - 31 steps took 5.2 seconds, avg_step_time=0.167
 - test time: 0.8 seconds
static_classes.trainer - INFO - 
Epoch: 37
static_classes.trainer - INFO -  = Minimal loss value = 2.05922269821167,
 - 31 steps took 5.3 seconds, avg_step_time=0.170
 - test time: 0.8 seconds
static_classes.trainer - INFO - 
Epoch: 38
static_classes.trainer - INFO -  = Minimal loss value = 1.7058104276657104,
 - 31 steps took 5.3 seconds, avg_step_time=0.171
 - test time: 0.9 seconds
static_classes.trainer - INFO - 
Epoch: 39
static_classes.trainer - INFO -  = Minimal loss value = 1.2477200031280518,
 - 31 steps took 5.3 seconds, avg_step_time=0.172
 - test time: 0.9 seconds
static_classes.trainer - INFO - 
Epoch: 40
static_classes.trainer - INFO -  = Minimal loss value = 0.9137452840805054,
 - 31 steps took 5.2 seconds, avg_step_time=0.168
 - test time: 0.9 seconds
static_classes.trainer - INF

static_classes.trainer - INFO - 
Epoch: 78
static_classes.trainer - INFO -  = Minimal loss value = -5.74231481552124,
 - 31 steps took 4.7 seconds, avg_step_time=0.150
 - test time: 0.8 seconds
static_classes.trainer - INFO - 
Epoch: 79
static_classes.trainer - INFO -  = Minimal loss value = -5.74231481552124,
 - 31 steps took 4.5 seconds, avg_step_time=0.147
 - test time: 0.8 seconds
static_classes.trainer - INFO - 
Epoch: 80
static_classes.trainer - INFO -  = Minimal loss value = -5.74231481552124,
 - 31 steps took 4.6 seconds, avg_step_time=0.150
 - test time: 0.9 seconds
static_classes.trainer - INFO - 
Epoch: 81
static_classes.trainer - INFO -  = Minimal loss value = -5.74231481552124,
 - 31 steps took 4.6 seconds, avg_step_time=0.149
 - test time: 0.8 seconds
static_classes.trainer - INFO - 
Epoch: 82
static_classes.trainer - INFO -  = Minimal loss value = -5.74231481552124,
 - 31 steps took 4.6 seconds, avg_step_time=0.149
 - test time: 0.9 seconds
static_classes.trainer - INFO 

### Inference

In [280]:
checkpoint = ""
with open("best_ckpt.txt", "r") as f:
    checkpoint = f.readline()

nn = StochNet(
    nb_past_timesteps=1,
    nb_features=3,
    nb_randomized_params=len(params),
    project_folder=project_folder,
    timestep=timestep,
    dataset_id=dataset_id,
    model_id=model_id,
    mode="inference",
)

# n_settings = 9
traj_per_setting = 10
n_steps = 10

settings = te_gen.get_merged_concentrations_and_params()
setting_idx = 0
curr_state = settings[setting_idx : setting_idx + 1, np.newaxis, :]

next_state_samples = nn.next_state(
    curr_state_values=curr_state,
    curr_state_rescaled=False,
    scale_back_result=True,
    round_result=False,
    n_samples=10000,
)

# print(next_state_samples.shape)
# first_sample = next_state_samples[0]
# first_setting = first_sample[0]
# print(first_setting)

nn_traces = nn.generate_traces(
    settings[:, np.newaxis, :],
    n_steps=n_steps,
    n_traces=traj_per_setting,
    curr_state_rescaled=False,
    scale_back_result=True,
    round_result=True,
    add_timestamps=True,
)

# np.save("nn_traces.npy", nn_traces)

static_classes.model - INFO - Model created in inference mode.


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00,  6.81it/s]


In [281]:
print(te_dataset.shape)
print(nn_traces.shape)

(1000, 11, 6)
(100, 10, 11, 4)


In [282]:
# get the first initial setting, the first simulation
print(nn_traces[0][0])

[[  0.   73.  131.  125. ]
 [  0.1  71.  192.   72. ]
 [  0.2  44.  258.   47. ]
 [  0.3  21.  300.   40. ]
 [  0.4  11.  303.   31. ]
 [  0.5   5.  311.   30. ]
 [  0.6   4.  316.   29. ]
 [  0.7   1.  318.   27. ]
 [  0.8   1.  321.   27. ]
 [  0.9   2.  321.   27. ]
 [  1.    0.  325.   29. ]]
