# Generate and store Sachs environments.

In this notebook we generate instances of the Sachs dataset with different random splits.

In [None]:
%load_ext autoreload
%autoreload 2
import os
import shutil
from random import randint

from src.environments.sachs import Sachs


Generate the environments.

In [None]:
# generation setup
data_file = '../data/sachs.csv'
normalise_data = True
split = (500, 353)  # (num_train_samples, num_test_samples)
num_instances = 5

delete_existing = False  # delete existing benchmarks
env_dir = '../data/Sachs/'  # dir where to store the generated envs

env_dir = '../test/'  # dir where to store the generated envs

# generate/empty folder for envs of same type
num_train_samples = 853 if split is None else split[0]
num_test_samples = 0 if split is None else split[1]

setup_dir = os.path.join(env_dir, f'{num_train_samples}_train_{num_test_samples}_test/')
if os.path.isdir(setup_dir) and not delete_existing:
    print('\nDirectory \'' + setup_dir + '\' already exists, not generating environments...')
else:
    if os.path.isdir(setup_dir):
        print('\nDirectory \'' + setup_dir + '\' already exists, delete existing environments...')
        for root, dirs, files in os.walk(setup_dir):
            for file in files:
                os.remove(os.path.join(root, file))
            for folder in dirs:
                shutil.rmtree(os.path.join(root, folder))

    os.makedirs(setup_dir, exist_ok=True)

    # generate benchmark envs
    for i in range(num_instances):
        seed = randint(0, 999999)
        env = Sachs(split, normalise_data, data_file, seed)
        env_path = os.path.join(setup_dir, f'sachs-{num_train_samples}-{num_test_samples}-{seed:06d}.pth')
        env.save(env_path)
        env.export_to_csv(setup_dir)
        print(f'\rGenerated {i + 1}/{num_instances} environments.', end='')
