In [1]:
import numpy as np
import pandas as pd

import utils

#### 1) Defining the dataset generation parameters

Those parameters define the true dynamics of the system, as well as how many samples there will be available in our training set.

In [2]:
from parameters import h, v, u

#DYNAMICS
h = h
v = v
u = u
params = [h, v, u]


#INITIAL DISTRIBUTION
inf_box, sup_box = -3, 3

#LENGTH OF HORIZON
n_steps_ahead = 20

#NUMBER OF TRAJECTORY SIMULATIONS
n_simulations = 10_000


#### 2) Build trajectories and store the pairs $(x_{t}, x_{t+1})$

In [3]:
pairs = []

for t in range(n_steps_ahead + 1):
    if t == 0:
        states = utils.getInitialState(inf_box, sup_box, n_simulations)

    else:
        previous_states = states.copy()
        states = utils.systemDynamics(states, params)

        for previous_state, state in zip(previous_states, states):
            pairs.append([previous_state, state])

#### 3) Build dataset (using DataFrame type)

In [4]:
df = pd.DataFrame(pairs)
df.columns = ['x', 'y']
df

Unnamed: 0,x,y
0,"[0.3101802085903067, -2.7617156216425283, -0.9...","[-0.8985558258519621, -1.873482994995219, -0.3..."
1,"[1.2107662316485737, -0.518452332717056, -2.02...","[-0.13627455041796743, -1.1783622746406195, -1..."
2,"[-1.8964583531063366, -2.3254043134674305, -2....","[-2.708374788860225, -3.586670225697164, -1.96..."
3,"[-2.5967448061008858, -2.726547485741719, 0.68...","[-1.6451114237123, -1.5670674705954208, 1.2872..."
4,"[-2.4125823366086436, 2.487834736931684, -1.95...","[-3.8019700085300654, 1.9225012980544394, -1.3..."
...,...,...
199995,"[-1.7718618511684616, -0.16709753618010514, 9....","[-1.6389094212325979, -1.6611938019455175, 9.9..."
199996,"[-5.158593854942994, -1.1046755483937805, 14.3...","[-3.6791207988856507, -1.3519799688968193, 14...."
199997,"[4.217546153103841, 0.20471517990648258, 11.12...","[2.7292585161854768, 0.3917978081645609, 11.72..."
199998,"[-1.894542999955521, -5.24556191514504, 12.435...","[-2.0898540046790064, -3.7583317340923887, 13...."


#### 4) Store dataset in CSV

In this step, we also serialize the arrays into strings to store. In the dataprocessing of the training notebook, we re-convert those strings to arrays.

In [5]:
# Serialize the arrays into strings
df['x'] = df['x'].apply(lambda x: ' '.join(map(str, x)))
df['y'] = df['y'].apply(lambda x: ' '.join(map(str, x)))

# Save the DataFrame to a CSV file
df.to_csv('data.csv', index=False)