# Splitting SEPP Simulated Data into Train, Test, and Validation Sets
This notebook processes simulated SEPP datasets by splitting them based on timestamp intervals into training, test, and validation subsets.

## 0. Import libraries and configure paths

In [None]:
import sys, os
from pathlib import Path

# Add local paths to system path
path_opencp = Path('Librerias/PredictCode')
path_fairness = Path('Librerias')
path_exp = Path('.../EXP0/SCRIPTS')
sys.path.insert(0, str(path_opencp.resolve()))
sys.path.insert(0, str(path_fairness.resolve()))
sys.path.insert(0, str(path_exp.resolve()))

## 1. Set working directory and import core packages

In [None]:
import pickle as pkl
import datetime

# Change to working directory where the data is stored
os.chdir('.../EXP0')

## 2. Load global variables for time ranges and file directories

In [None]:
from global_vars import f_inicial, f_final, f_final_train, f_final_test, f_final_val
from global_vars import dir_sims, dir_split

# Prefix for simulation files
prefix_data = "Data_"

## 3. Split each simulation into Train, Test, and Validation

In [None]:
# Iterate through simulation files and perform time-based splits
for i in os.listdir(dir_sims):
    if prefix_data in i:
        timed_points_NM = pkl.load(open(os.path.join(dir_sims, i), "rb"))

        # Split data based on timestamp intervals
        data_train = timed_points_NM[(timed_points_NM.times_datetime() >= f_inicial) &
                                     (timed_points_NM.times_datetime() < f_final_train)]
        data_test = timed_points_NM[(timed_points_NM.times_datetime() >= f_final_train) &
                                    (timed_points_NM.times_datetime() < f_final_test)]
        data_val = timed_points_NM[(timed_points_NM.times_datetime() >= f_final_test) &
                                   (timed_points_NM.times_datetime() < f_final_val)]

        # Save each subset to file
        pkl.dump(data_train, open(os.path.join(dir_split, "Train_" + i), "wb"))
        pkl.dump(data_test, open(os.path.join(dir_split, "Test_" + i), "wb"))
        pkl.dump(data_val, open(os.path.join(dir_split, "Val_" + i), "wb"))