In [1]:
from time_series.data_generators import LorenzGenerator
from time_series.time_series_models import KernelRidgeRegression, MovingAverageEstimator
from time_series.kernels import GaussianKernel
from time_series.evaluators.mse_one_step import MeanSquaredError

[32m2025-09-16 11:31:26.444[0m | [1mINFO    [0m | [36mtime_series.config[0m:[36m<module>[0m:[36m13[0m - [1mPROJ_ROOT path is: /home/james/Repo/PhD Repo/time_series_clustering[0m


In [2]:
import numpy as np
import itertools

In [3]:
import yaml

with open("experiment.yaml", "r") as file:
    config = yaml.safe_load(file)

In [4]:
defintitions_conf = config["definitions"]
experiments_conf = config["experiments"]

In [5]:
definitions = dict()
for k, conf in defintitions_conf.items():
    if k == "datasets":
        datasets = dict()
        for d, v  in conf.items():
            datasets[d] = dict()
            if v["generator"] == "Lorenz":
                datasets[d]["generator"] = LorenzGenerator
            if "parameters" in v:
                datasets[d]["parameters"] = v["parameters"]    
            else:
                datasets[d]["parameters"] = {}
        definitions[k] = datasets

    elif k == "models":
        models = dict()
        for m, v  in conf.items():
            models[m] = dict()
            if v["model"] == "KernelRidgeRegression":
                models[m]["model"] = KernelRidgeRegression
                
            if "parameters" in v:
                models[m]["parameters"] = v["parameters"]
            else:
                models[m]["parameters"] = {}
        definitions[k] = models

    elif k == "kernels":
        kernels = dict()
        for i, v  in conf.items():
            kernels[i] = dict()
            if v["kernel"] == "GaussianKernel":
                kernels[i]["kernel"] = GaussianKernel
                
            if "parameters" in v:
                kernels[i]["parameters"] = v["parameters"]
            else:
                kernels[i]["parameters"] = {}
        definitions[k] = kernels
    
    elif k == "evaluators":
        evaluators = dict()
        for i, v  in conf.items():
            evaluators[i] = dict()
            if v["evaluator"] == "MeanSquaredError":
                evaluators[i]["evaluator"] = MeanSquaredError
                
            if "parameters" in v:
                evaluators[i]["parameters"] = v["parameters"]
            else:
                evaluators[i]["parameters"] = {}
        definitions[k] = evaluators

In [6]:
class TimeSeriesData:
    def __init__(self, X, y=None, train_val_test_split=None, **kwargs):
        self.__dict__.update(kwargs)
        self.X = X
        self.y = y

        self.indices = np.arange(len(X))
        self.tvt_split = train_val_test_split

In [12]:
def generate_data(generator, parameters):
    data_generator = generator(**parameters)

def iterate_datasets(datasets):
    for dataset_name, dataset_confs in datasets.items():
        dataset_def = definitions["datasets"][dataset_name]

        if "train_val_test_split" in dataset_confs:
            tvt_split = dataset_confs["train_val_test_split"]
        else:
            tvt_split = [1]

        result = dict(
            dataset=dataset_name,
            train_test_val_split = tvt_split,
            sweep_vals=None,
            parameters=dict(dataset_def["parameters"])
        )

        if "parameters" in dataset_confs:
            for param, value in dataset_confs["parameters"].items():
                result["parameters"][param] = value

        # Process overides
        if "sweeps" in dataset_confs:
            for sweep in dataset_confs["sweeps"]:
                sweep_result = dict(result)

                sweep_val_names = []
                sweep_values = []
                for sweep_param, sweep_conf in dataset_confs["sweeps"][sweep].items():
                    sweep_vals = np.linspace(
                        float(sweep_conf["min"]), 
                        float(sweep_conf["max"]), 
                        int(sweep_conf["N_steps"])
                    )
                    sweep_val_names.append(sweep_param)
                    sweep_values.append(sweep_vals)

                # Combine the sweep values
                all_combinations = itertools.product(*sweep_values)                
                for combined_vals in all_combinations:
                    for i, param in enumerate(sweep_val_names):
                        sweep_result["parameters"][param] = combined_vals[i]

                    t, data = dataset_def["generator"](**sweep_result["parameters"])()
                    sweep_result["data"] = TimeSeriesData(
                        t = t,
                        X = data[:-1],
                        y = data[1:],
                        train_val_test_split=tvt_split
                    )
                    yield sweep_result
        
        else:
            t, data = dataset_def["generator"](**result["parameters"])()
            result["data"] = TimeSeriesData(
                t = t,
                X = data[:-1],
                y = data[1:],
                train_val_test_split=tvt_split
            )
            yield result

In [13]:
for experiment_name, experiment in experiments_conf.items():
    # Process datasets
    datasets = iterate_datasets(experiment["datasets"])


In [16]:
for d in datasets:
    break

In [15]:
d

{'dataset': 'dataset1',
 'train_test_val_split': [0.6, 0.2, 0.2],
 'sweep_vals': None,
 'parameters': {'noise_mean': [0, 0, 0],
  'x0': [10, 10, 10],
  'dt': 0.001,
  'T': 10,
  'rho': 28,
  'sigma': 10,
  'beta': 2.4,
  'noise_covariance': np.float64(0.01)},
 'data': <__main__.TimeSeriesData at 0x7f1b77dc6180>}

In [11]:
1/0

ZeroDivisionError: division by zero

In [None]:
def parse_configs():
    pass

def load_data():
    pass

def load_model():
    pass

def tune_parameters():
    pass

def train_final_model():
    pass

def evaluate_model():
    pass

def generate_reports():
    pass

In [None]:
class Experiment:
    pass

    def run(self):
        # Parse configs

        # Load data

        # Load model

        # Tune parameters

        # Train final model

        # Evaluate model

        # Generate reports

In [None]:
x = [np.linspace(0, 1, 10)]

In [None]:
list(itertools.product(*x))