# (Unit?) Testing for the whole pipeline
Let's give the whole pipeline a try.

In [2]:
from create_dataset_from_ode_system import get_df_from_ode_system, parse_ode_from_text
from explicit_euler_method import apply_euler_method

from pysr import PySRRegressor

In [3]:
 # this is just a test with Lotka-Volterra
random_seed = 42
ode_system_file_name = "../data/lotka-volterra.txt"

print("Obtaining data for ODE system in file \"%s\"..." % ode_system_file_name)
equations, ordered_variables, initial_conditions, time_step, max_time = parse_ode_from_text(ode_system_file_name)
df = get_df_from_ode_system(equations, ordered_variables, initial_conditions, time_step, max_time)
print(df)

print("Applying explicit Euler method...")
df_euler = apply_euler_method(df)
print(df_euler)

target_names = [c for c in df_euler.columns if c.startswith("F_")]
dictionary_equations = {}

for target in target_names :
    print("Now running symbolic regression for variable \"%s\"..." % target)

    # create dataframes with selection: TODO split data? cross-validation?
    y = df_euler[target]
    X = df_euler[[c for c in df_euler.columns if c != target and not c.startswith("F_")]]

    # initialize PySRRegressor
    symbolic_regressor = PySRRegressor(
        batching=True, # use batches instead of the whole dataset
        batch_size=50, # 50 is the default value for the batches
        model_selection="best",  # Result is mix of simplicity+accuracy
        niterations=1000,
        binary_operators=["+", "*", "/", "-", ],
        unary_operators=["sin", "cos", "exp", "sqrt", ],
        early_stop_condition=("stop_if(loss, complexity) = loss < 1e-6 && complexity < 10"), # stop early if we find a good and simple equation
        verbosity=1,
        )

    symbolic_regressor.fit(X, y)
    dictionary_equations[target] = symbolic_regressor.equations

Obtaining data for ODE system in file "../data/lotka-volterra.txt"...
                x          y       t
0       20.000000  20.000000     0.0
1       20.060210  19.761672     0.1
2       20.120839  19.526656     0.2
3       20.181887  19.294904     0.3
4       20.243353  19.066370     0.4
...           ...        ...     ...
9996   112.599985  66.201453   999.6
9997   112.672987  67.881078   999.7
9998   112.736443  69.605218   999.8
9999   112.790084  71.374822   999.9
10000  112.833635  73.190839  1000.0

[10001 rows x 3 columns]
Applying explicit Euler method...
            F_x       F_y           x          y      t  delta_t
0      0.000000  0.000000   20.000000  20.000000    0.0      0.0
1      0.060210 -0.238328   20.000000  20.000000    0.0      0.1
2      0.000000  0.000000   20.060210  19.761672    0.1      0.0
3      0.060629 -0.235016   20.060210  19.761672    0.1      0.1
4      0.000000  0.000000   20.120839  19.526656    0.2      0.0
...         ...       ...         ..

  if X.columns.is_object() and X.columns.str.contains(" ").any():
  if X.columns.is_object() and X.columns.str.contains(" ").any():


Now running symbolic regression for variable "F_y"...




In [4]:
print(dictionary_equations)

{'F_x':     complexity      loss         score  \
0            1  0.047350  0.000000e+00   
1            2  0.047350  4.857462e-07   
2            3  0.034777  3.086038e-01   
3            5  0.022178  2.249253e-01   
4            6  0.022178  3.156257e-07   
5            7  0.008525  9.560565e-01   
6            9  0.006742  1.173441e-01   
7           11  0.006471  2.052558e-02   
8           12  0.006240  3.634201e-02   
9           13  0.006240  2.179532e-05   
10          17  0.006154  3.471029e-03   
11          18  0.006154  1.612034e-05   
12          19  0.005523  1.081606e-01   

                                             equation  \
0                                        0.0055041015   
1                                     cos(-7.8484645)   
2                                 (-0.0008292302 * y)   
3                      ((y * -0.016591342) * delta_t)   
4                  ((y * sin(-0.01659152)) * delta_t)   
5        (((y * -0.024969868) + 2.0042782) * delta_t)   
6   

In [1]:
# next step, take all equations, compute derivative in delta_t and set delta_t to zero
# (replicating lim delta_t -> 0)


NameError: name 'dictionary_equations' is not defined