In [3]:
import re
import pandas as pd
import numpy as np
import os
import sys
from sktime.datasets import load_from_ucr_tsv_to_dataframe, load_from_tsfile
import yaml


def extract_metadata(path):
    # Replace 'file_path' with the path to your file
    file_path = path

    # Read the file
    with open(file_path, 'r') as file:
        file_content = file.read()

    # Extract the seriesLength value
    series_length_match = re.search(r"@seriesLength (\d+)", file_content)
    if series_length_match:
        series_length = int(series_length_match.group(1))
        print("Series Length:", series_length)
    else:
        print("Series Length not found")

    # Extract the dimensions value
    dimensions_match = re.search(r"@dimensions (\d+)", file_content)
    if dimensions_match:
        dimensions = int(dimensions_match.group(1))
        print("Dimensions:", dimensions)
    else:
        print("Dimensions not found")

    # Extract the unique labels from the @classLabel line
    class_label_match = re.search(r"@classLabel true (.+)", file_content)
    if class_label_match:
        class_labels_str = class_label_match.group(1)
        class_labels = [float(label) for label in class_labels_str.split()]
        print("Unique labels:", class_labels)
    else:
        print("Class labels not found")

    return series_length,dimensions,len(class_labels)


def read_yaml_config(CONFIG):
    with open(CONFIG, 'r') as file:
        try:
            config = yaml.safe_load(file)
            return config
        except yaml.YAMLError as exc:
            print(exc)
            return None

In [3]:
CONFIG = os.path.join(os.getcwd(),"config.yaml")
config_data = read_yaml_config(CONFIG)
print(CONFIG)
print(config_data)


c:\Users\nati\Desktop\Implementations\FinalProject\FinalProject\config.yaml
{'datageneration': {'apply_example_scaling': True, 'apply_feature_scaling': True, 'attribute_discriminator_beta1': 0.5, 'attribute_discriminator_learning_rate': 0.001, 'attribute_gradient_penalty_coef': 10.0, 'attribute_loss_coef': 1.0, 'attribute_noise_dim': 10, 'attribute_num_layers': 3, 'attribute_num_units': 100, 'batch_size': 1024, 'binary_encoder_cutoff': 150, 'discriminator_beta1': 0.5, 'discriminator_learning_rate': 0.001, 'discriminator_rounds': 1, 'epochs': 1500, 'feature_noise_dim': 10, 'feature_num_layers': 1, 'feature_num_units': 100, 'forget_bias': False, 'generate_n_sample': 550, 'generator_beta1': 0.5, 'generator_learning_rate': 0.001, 'generator_rounds': 1, 'gradient_penalty_coef': 10.0, 'max_sequence_len': 144, 'mixed_precision_training': False, 'normalization': False, 'percentage_of_original_data': 0.7, 'sample_length': 144, 'use_attribute_discriminator': True}, 'experiment_params': {'dataset

In [16]:
data = pd.read_csv(r"C:\Users\nati\Downloads\all_experiments_param (2).csv")
unique_combinations = data[['dataset_name', 'synthetic_num_samples', 'dgan_original_data_ratio']].drop_duplicates()
display(unique_combinations)
unique_combinations.to_csv('synthetic_data_generation.csv',index=False)

Unnamed: 0,dataset_name,synthetic_num_samples,dgan_original_data_ratio
0,ArticularyWordRecognition,2,0.7
2,ArticularyWordRecognition,2,0.9
3,ArticularyWordRecognition,4,0.7
5,ArticularyWordRecognition,4,0.9
48,AtrialFibrillation,2,0.7
...,...,...,...
869,PEMS-SF,4,0.9
912,PenDigits,2,0.7
914,PenDigits,2,0.9
915,PenDigits,4,0.7


In [7]:
import sys
import subprocess
# Read the data from the csv file
# Get the full path to the Python interpreter
python_executable = sys.executable
script_path = r'C:\Users\nati\Desktop\Implementations\FinalProject\FinalProject\run_single_experiment_new.py'

data = pd.read_csv(r"C:\Users\nati\Downloads\all_experiments_param (2).csv")
display(data)
# For each of the benchmark config go over all possible combinations of experiments config, generate the appropriate yaml file and run the experiment
for index, row in data.iterrows():
    index = index
    # Get the benchmark config
    pretraining_epochs = row['epochs']
    learning_rate = row['learning_rate']
    dataset_name = row['dataset_name']
    model_type = row['model_name']
    train_path = f'''{os.path.dirname(os.getcwd())}\Datasets\{dataset_name}\{dataset_name}_TRAIN.ts'''
    test_path = f'''{os.path.dirname(os.getcwd())}\Datasets\{dataset_name}\{dataset_name}_TEST.ts'''
    X,_ = load_from_tsfile(train_path)
    X_test,_ = load_from_tsfile(test_path)
    series_length,features_num,num_classes = extract_metadata(train_path)
    train_samples = X.shape[0]
    test_samples = X_test.shape[0]
    if model_type == 'LSTM':
        hidden_dim = row['hidden_dim']
        num_layers = row['num_layers']
    else:
        hidden_dim = 'Null'
        num_layers = 'Null'
    synthetic_num_samples = row['synthetic_num_samples']*train_samples
    batchsize = int(np.floor(row['BM_batch_size_ratio']*train_samples))
    percentage_of_original_data = row['dgan_original_data_ratio']

    epochs = row['epochs']
    #Print all of the above
    print(f'''pretraining_epochs: {pretraining_epochs}
    learning_rate: {learning_rate}
    dataset_name: {dataset_name}
    model_type: {model_type}
    train_path: {train_path}
    test_path: {test_path}
    series_length: {series_length}
    features_num: {features_num}
    num_classes: {num_classes}
    train_samples: {train_samples}
    hidden_dim: {hidden_dim}
    num_layers: {num_layers}
    synthetic_num_samples: {synthetic_num_samples}
    batchsize: {batchsize}
    percentage_of_original_data: {percentage_of_original_data}
    epochs: {epochs}''')
    finetuning_original_data_ratio = row['finetuning_original_data_ratio']
    # Edit yaml file
    config_data['datageneration']['generate_n_sample'] = synthetic_num_samples
    config_data['datageneration']['percentage_of_original_data'] = percentage_of_original_data
    config_data['datageneration']['max_sequence_len'] = series_length
    config_data['datageneration']['sample_length'] = series_length
    # Experiment Params
    config_data['experiment_params']['dataset_name'] = dataset_name
    config_data['experiment_params']['num_classes'] = num_classes
    config_data['experiment_params']['num_features'] = features_num
    config_data['experiment_params']['sequence_length'] = series_length
    # Finetuning
    config_data['finetuning']['batch_size'] = batchsize
    config_data['finetuning']['epochs'] = epochs
    config_data['finetuning']['learning_rate'] = learning_rate
    config_data['finetuning']['model_type'] = model_type
    # Pretraining
    config_data['pretraining']['hidden_size'] = hidden_dim
    config_data['pretraining']['num_layers_layers_stacked'] = num_layers
    config_data['pretraining']['batch_size'] = batchsize
    config_data['pretraining']['epochs'] = pretraining_epochs
    config_data['pretraining']['learning_rate'] = learning_rate
    config_data['pretraining']['model_type'] = model_type
    # Save the yaml file
    with open(CONFIG, 'w') as file:
        documents = yaml.dump(config_data, file)
    # Run the experiment
    subprocess.call([python_executable, script_path])
    break
    


Unnamed: 0,dataset_name,model_name,BM_batch_size_ratio,hidden_dim,num_layers,epochs,learning_rate,synthetic_num_samples,dgan_original_data_ratio,finetuning_original_data_ratio
0,ArticularyWordRecognition,inceptionTime,0.1,,,10,0.0010,2,0.7,0.7
1,ArticularyWordRecognition,inceptionTime,0.1,,,10,0.0010,2,0.7,0.9
2,ArticularyWordRecognition,inceptionTime,0.1,,,10,0.0010,2,0.9,0.9
3,ArticularyWordRecognition,inceptionTime,0.1,,,10,0.0010,4,0.7,0.7
4,ArticularyWordRecognition,inceptionTime,0.1,,,10,0.0010,4,0.7,0.9
...,...,...,...,...,...,...,...,...,...,...
4795,PenDigits,LSTM,0.2,128.0,3.0,20,0.0001,2,0.7,0.9
4796,PenDigits,LSTM,0.2,128.0,3.0,20,0.0001,2,0.9,0.9
4797,PenDigits,LSTM,0.2,128.0,3.0,20,0.0001,4,0.7,0.7
4798,PenDigits,LSTM,0.2,128.0,3.0,20,0.0001,4,0.7,0.9


Series Length: 144
Dimensions: 9
Unique labels: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0]
pretraining_epochs: 10
    learning_rate: 0.001
    dataset_name: ArticularyWordRecognition
    model_type: inceptionTime
    train_path: c:\Users\nati\Desktop\Implementations\FinalProject\Datasets\ArticularyWordRecognition\ArticularyWordRecognition_TRAIN.ts
    test_path: c:\Users\nati\Desktop\Implementations\FinalProject\Datasets\ArticularyWordRecognition\ArticularyWordRecognition_TEST.ts
    series_length: 144
    features_num: 9
    num_classes: 25
    train_samples: 275
    hidden_dim: Null
    num_layers: Null
    synthetic_num_samples: 550
    batchsize: 27
    percentage_of_original_data: 0.7
    epochs: 10
