In [None]:
import torch

from torch import nn
from torch.utils.data import Dataset
import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

from scenarios.abstract_scenario import AbstractScenario
from methods.toy_model_selection_method import ToyModelSelectionMethod # change input dimension! (f model(s) for X, g model for Y)

In [None]:
alpha_values = [0, 1, 5]
runs = 10  # Number of runs for each alpha value

In [None]:
# Define the dataset
class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
for alpha in alpha_values:
    train_data_path = f'data/sec5.4/train_Zcont_g_fct_f_softplusalpha{alpha}.csv'
    test_data_path = f'data/sec5.4/test_Zcont_g_fct_f_softplusalpha{alpha}.csv'
    
    train_data = torch.tensor(np.genfromtxt(train_data_path, delimiter=',', skip_header=1), dtype=torch.float32)
    test_data = np.genfromtxt(test_data_path, delimiter=',', skip_header=1)

    data_train_length = train_data.shape[0]
    print(f'Train data size: {data_train_length}')

    # Separate the columns into individual tensors
    Z = train_data[:, 0].reshape(-1, 1)
    X = train_data[:, 1].reshape(-1, 1)
    Y = train_data[:, 2].reshape(-1, 1)


    # Create an instance of the dataset
    dataset = MyDataset(train_data)

    # Define the split ratio
    train_ratio = 0.9  # 90% of the data for training, 10% for validation
    train_size = int(train_ratio * len(dataset))
    val_size = len(dataset) - train_size

    # Split the dataset into training and validation sets
    train_data, val_data = torch.utils.data.random_split(dataset, [train_size, val_size])

    # Separate the columns into individual tensors
    Z_train = train_data.dataset.data[:train_size, 0].reshape(-1, 1)
    X_train = train_data.dataset.data[:train_size, 1].reshape(-1, 1)
    Y_train = train_data.dataset.data[:train_size, 2].reshape(-1, 1)

    Z_val = val_data.dataset.data[train_size:, 0].reshape(-1, 1)
    X_val = val_data.dataset.data[train_size:, 1].reshape(-1, 1)
    Y_val = val_data.dataset.data[train_size:, 2].reshape(-1, 1)
    
    deepGMM = ToyModelSelectionMethod()

    # Define Xtest
    X_test = torch.tensor(test_data[:,0].astype(np.float32)).squeeze()
    # X_test_grid = torch.tensor(test_data[:,2].astype(np.float32)).squeeze()

    # Initialize an empty DataFrame for storing results
    df_mse  = pd.DataFrame()
    df_plot = pd.DataFrame()

    for i in range(runs):
        deepGMM.fit(X_train.double(), Z_train.double(), Y_train.double(), 
                    X_val.double(), Z_val.double(), Y_val.double(), 
                    g_dev=None, verbose=True)
        
        y_hat_deepGMM = deepGMM.predict(X_test.double()).flatten().detach().numpy()
        # y_hat_deepGMM_grid = deepGMM.predict(X_test_grid.double()).flatten().detach().numpy()

        # Add the results as a new column to the DataFrame
        df_mse[f'Run_{i+1}'] = y_hat_deepGMM
        # df_plot[f'Run_{i+1}'] = y_hat_deepGMM_grid
        
    # Save the results for the current alpha
    df_mse.to_csv(f'results/sec5.4/deepgmm_result_mse_alpha{alpha}.csv', index=False)
    # df_plot.to_csv(f'output_data/deepgmm_result_plot_alpha{alpha}.csv', index=False)
    
    # Optionally, plot the results
    # plt.scatter(X_test_grid, df_plot.iloc[:, -1], s=1, color='red', label=f'DeepGMM Alpha {alpha}')
    # plt.scatter(X_test_grid, test_data[:, 3], s=1)
    # plt.legend()
    # plt.show()