# Evaluation pipeline

## 0 Imports and Constants

In [11]:
import sys
import os

# Füge das übergeordnete Verzeichnis zu sys.path hinzu
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '../../'))
sys.path.insert(0, parent_dir)

In [12]:
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import seaborn as sns
from copy import deepcopy as dc

from utilities import split_data_into_sequences, load_sequential_time_series, reconstruct_sequential_data, Scaler, extract_features_and_targets_reg, get_discriminative_test_performance
from visual.visual_evaluation import visualize
from predictive_evaluation import predictive_evaluation

from discriminative.discriminative_model import LSTMClassification

In [13]:
DATA_FOLDER = Path("../../data")
REAL_DATA_FOLDER = DATA_FOLDER / "real"
SYNTHETIC_DATA_FOLDER = DATA_FOLDER / "synthetic"

## 1 Load and Visualize Data

### Ways of loading data
- Laden der Originaldaten: als pd dataframe 
- Laden der synthetischen, sequentiellen Daten: als np array (GAN, (V)AE)
- Laden der synthetischen, sequentiellen Daten: als pd dataframe (brownian, algorithmit)

In [14]:
# possible types: 'timegan_lstm', 'timegan_gru', 'jitter', 'timewarp', 'autoencoder', 'all'
syn_data_type = 'jitter'

In [15]:
if syn_data_type == 'all':
    # load all results
    pass

else:

    # Load real time series
    data_real_df = pd.read_csv(REAL_DATA_FOLDER/'metro_interstate_traffic_volume_label_encoded_no_categorical.csv')
    data_real_numpy = dc(data_real_df).to_numpy()

    if syn_data_type == 'timegan_lstm':
        # load sequential data (which should already be scaled)
        data_syn_numpy = load_sequential_time_series(SYNTHETIC_DATA_FOLDER/'mitv_28499_12_5_lstm_unscaled.csv', shape=(28499, 12, 5))

    elif syn_data_type == 'timegan_gru':
        data_syn_numpy = load_sequential_time_series(SYNTHETIC_DATA_FOLDER/'mitv_28499_12_5_gru_unscaled.csv', shape=(28499, 12, 5))

    elif syn_data_type == 'autoencoder':
        data_syn_numpy = load_sequential_time_series(SYNTHETIC_DATA_FOLDER/'mitv_28478_12_5_autoencoder_unscaled.csv', shape=(28478, 12, 5))

    elif syn_data_type == 'jitter':
        jitter_factor = 0.1
        data_syn_df = pd.read_csv(SYNTHETIC_DATA_FOLDER/f'mitv_jittered_{str(jitter_factor).replace(".", "")}.csv')
        data_syn_numpy = dc(data_syn_df).to_numpy()

    elif syn_data_type == 'timewarp':
        data_syn_df = pd.read_csv(SYNTHETIC_DATA_FOLDER/f'mitv_time_warped.csv')
        data_syn_numpy = dc(data_syn_df).to_numpy()

    # Loot at real and syn data
    df = pd.DataFrame(data_syn_numpy.reshape(-1, data_syn_numpy.shape[-1]), columns=data_real_df.columns)
    df.describe()
    data_real_df.describe()

## 2 Predictive Evaluation

### Set Hyperparameters and Features

In [16]:
predictive_model_hyperparameters = {
    "seq_len": 12,
    "lr": 0.0001,
    "batch_size": 32,
    "hidden_size": 4,
    "num_layers": 1,
    "bidirectional": True,
    "num_evaluation_runs": 10,
    "num_epochs": 200,
    "device": 'cuda' if torch.cuda.is_available() else 'cpu'
}

In [17]:
### Load old data and plot it ###
if syn_data_type == 'all':
    # load all results
    pass

### Get new data and plot it ###
else:
    # evaluate predictive performance
    predictive_results = predictive_evaluation(data_real_numpy, data_syn_numpy, predictive_model_hyperparameters, include_baseline=True, verbose=True)

    # save results
    predictive_results.to_csv(DATA_FOLDER / f"results/results_{syn_data_type}_{predictive_model_hyperparameters['num_epochs']}_{predictive_model_hyperparameters['num_evaluation_runs']}.csv", index=False)

    # split in mse and mae results
    mse_results = predictive_results.loc[predictive_results['Metric'] == 'MSE']
    mae_results = predictive_results.loc[predictive_results['Metric'] == 'MAE']

    # plot results
    plt.figure(figsize=(14, 8))
    sns.boxplot(x='Model', y='Error', hue='Model', data=mse_results)
    sns.stripplot(x='Model', y='Error', hue='Metric', data=mse_results, dodge=True, jitter=True, palette='dark:black', alpha=0.7)
    plt.ylabel('Mean Squared Error')
    plt.xlabel('Metric')
    plt.title(f'MSE | {syn_data_type} | {predictive_model_hyperparameters["num_evaluation_runs"]} Training Runs {" | jitter factor = " + str(jitter_factor) if syn_data_type == "jitter" else ""}')
    plt.legend()

    plt.figure(figsize=(14, 8))
    sns.boxplot(x='Model', y='Error', hue='Model', data=mae_results)
    sns.stripplot(x='Model', y='Error', hue='Metric', data=mae_results, dodge=True, jitter=True, palette='dark:black', alpha=0.7)
    plt.ylabel('Mean Absolute Error')
    plt.xlabel('Metric')
    plt.title(f'MAE | {syn_data_type} | {predictive_model_hyperparameters["num_evaluation_runs"]} Training Runs {" | jitter factor = " + str(jitter_factor) if syn_data_type == "jitter" else ""}')
    plt.legend()

HYPERPARAMETERS:
seq_len :  12
lr :  0.0001
batch_size :  32
hidden_size :  4
num_layers :  1
bidirectional :  True
num_evaluation_runs :  10
num_epochs :  200
device :  cpu
Synthetic Data is sequential: False
Shape of the data after splitting into sequences: (22797, 12, 5)
Shape of the data after splitting into sequences: (2841, 12, 5)
Shape of the data after splitting into sequences: (2840, 12, 5)


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1
Training Loss: 0.07869292058050632
Training Loss: 0.07186358736827969
Training Loss: 0.06868705555796623
Validation Loss: 0.06108858172645729
Validation Accuracy: 0.0
**************************************************
Epoch: 2
Training Loss: 0.06266070073470473
Training Loss: 0.05839964272454381


  0%|          | 0/10 [00:04<?, ?it/s]

Training Loss: 0.0554901968035847





KeyboardInterrupt: 

## 3 Discriminative Evaluation

In [None]:
discriminative_model_hyperparameters = {
    "lr": 0.0001,
    "batch_size": 32,
    "hidden_size": 4,
    "num_layers": 1,
    "bidirectional": True,
    "output_logits": True,
    "num_epochs": 1000,
    "type": 'lstm',
    "device": 'cuda' if torch.cuda.is_available() else 'cpu'
}

In [None]:
# Load discriminative model
discriminative_model = LSTMClassification(
    device=discriminative_model_hyperparameters['device'],
    batch_size=discriminative_model_hyperparameters['batch_size'],
    input_size=data_real_numpy.shape[-1],
    hidden_size=discriminative_model_hyperparameters['hidden_size'],
    num_stacked_layers=discriminative_model_hyperparameters['num_layers'],
    bidirectional=discriminative_model_hyperparameters['bidirectional'],
    output_logits=discriminative_model_hyperparameters['output_logits']
)

# Lade die Modellparameter
discriminative_model.load_state_dict(torch.load(f'discriminative_model_{discriminative_model_hyperparameters['type']}.pth'))

In [None]:
# Load test data
autoencoder_test = load_sequential_time_series(SYNTHETIC_DATA_FOLDER / 'discriminative_test' / 'discriminative_test_autoencoder_3000_13_5.csv', shape=(3000, 13, 5))
jitt_01_test = load_sequential_time_series(SYNTHETIC_DATA_FOLDER / 'discriminative_test' / 'discriminative_test_jitt_01_3000_13_5.csv', shape=(3000, 13, 5))
jitt_02_test = load_sequential_time_series(SYNTHETIC_DATA_FOLDER / 'discriminative_test' / 'discriminative_test_jitt_02_3000_13_5.csv', shape=(3000, 13, 5))
jitt_005_test = load_sequential_time_series(SYNTHETIC_DATA_FOLDER / 'discriminative_test' / 'discriminative_test_jitt_005_3000_13_5.csv', shape=(3000, 13, 5))
timegan_gru_test = load_sequential_time_series(SYNTHETIC_DATA_FOLDER / 'discriminative_test' / 'discriminative_test_timegan_gru_3000_13_5.csv', shape=(3000, 13, 5))
timegan_lstm_test = load_sequential_time_series(SYNTHETIC_DATA_FOLDER / 'discriminative_test' / 'discriminative_test_timegan_lstm_3000_13_5.csv', shape=(3000, 13, 5))
timewarp_test = load_sequential_time_series(SYNTHETIC_DATA_FOLDER / 'discriminative_test' / 'discriminative_test_timewarp_3000_13_5.csv', shape=(3000, 13, 5))

In [None]:
discriminative_results = pd.DataFrame(columns=['Method', 'Accuracy'])

In [None]:
# get results
discriminative_results = get_discriminative_test_performance(discriminative_model, discriminative_model_hyperparameters['device'], autoencoder_test, 'Autoencoder', discriminative_results)
discriminative_results = get_discriminative_test_performance(discriminative_model, discriminative_model_hyperparameters['device'], jitt_01_test, 'Jitter 0.1', discriminative_results)
discriminative_results = get_discriminative_test_performance(discriminative_model, discriminative_model_hyperparameters['device'], jitt_02_test, 'Jitter 0.2', discriminative_results)
discriminative_results = get_discriminative_test_performance(discriminative_model, discriminative_model_hyperparameters['device'], jitt_005_test, 'Jitter 0.05', discriminative_results)
discriminative_results = get_discriminative_test_performance(discriminative_model, discriminative_model_hyperparameters['device'], timegan_gru_test, 'TimeGAN GRU', discriminative_results)
discriminative_results = get_discriminative_test_performance(discriminative_model, discriminative_model_hyperparameters['device'], timegan_lstm_test, 'TimeGAN LSTM', discriminative_results)
discriminative_results = get_discriminative_test_performance(discriminative_model, discriminative_model_hyperparameters['device'], timewarp_test, 'Timewarp', discriminative_results)

In [None]:
# Plot results
plt.figure(figsize=(10, 6))
plt.bar(discriminative_results['Method'], discriminative_results['Accuracy'], color='skyblue')

# Diagramm anpassen
plt.xlabel('Method')
plt.ylabel('Accuracy')
plt.title('Accuracy per Method')
plt.xticks(rotation=45)  # Optional: Dreht die Beschriftungen der x-Achse um 45 Grad
plt.tight_layout()

# Diagramm anzeigen
plt.show()

## 4 Visual Evaluation

In [None]:
if syn_data_type == 'all':
    # load all results
    pass

else:
    
    # split data before feeding into visual evaluation
    data_real_seq = split_data_into_sequences(data_real_numpy, seq_len=predictive_model_hyperparameters['seq_len'], shuffle_data=True)

    if data_syn_numpy.ndim == 3:
        data_syn_seq = data_syn_numpy
    else:
        data_syn_seq = split_data_into_sequences(data_syn_numpy, seq_len=predictive_model_hyperparameters['seq_len'], shuffle_data=True)

    # evaluate visual performance
    visualize(data_real_seq, data_syn_seq, metric='pca')
    visualize(data_real_seq, data_syn_seq, metric='tsne')

Shape of the data after splitting into sequences: (28500, 12, 5)
Shape of the data after splitting into sequences: (28500, 12, 5)


### Beispielaussage:
PCA-Analyse von Realen und Synthetischen Daten
Um die Ähnlichkeit zwischen den realen und synthetischen Daten zu bewerten, wurde eine Principal Component Analysis (PCA) durchgeführt. Die PCA reduziert die Dimensionalität der Daten und projiziert sie auf eine zweidimensionale Ebene, wobei die Hauptkomponenten beibehalten werden, die den größten Teil der Varianz erklären.

Abbildung X zeigt den PCA-Plot der realen (rote Punkte) und synthetischen Daten (blaue Punkte). Die folgenden Beobachtungen können gemacht werden:

Verteilung und Clusterbildung:

Die roten Punkte, die die realen Daten repräsentieren, sind in einem spezifischen Bereich konzentriert.
Die blauen Punkte, die die synthetischen Daten darstellen, zeigen eine größere Verteilung und decken einen breiteren Bereich ab.
Ähnlichkeit und Unterschiede:

Die Tatsache, dass die synthetischen Daten eine größere Variabilität aufweisen, könnte darauf hinweisen, dass sie eine breitere Vielfalt an Mustern generieren.
Die Cluster der realen und synthetischen Daten überlappen sich teilweise, was darauf hindeutet, dass die synthetischen Daten einige der Eigenschaften der realen Daten gut nachahmen. Allerdings gibt es auch Bereiche, in denen die synthetischen Daten stark von den realen Daten abweichen, was auf Unterschiede in den zugrunde liegenden Verteilungen hinweist.
Schlussfolgerung:

Die PCA-Analyse zeigt, dass die synthetischen Daten in gewissem Maße die Struktur der realen Daten einfangen, jedoch eine größere Variabilität aufweisen.
Weitere Untersuchungen und Anpassungen am Generierungsprozess der synthetischen Daten könnten notwendig sein, um deren Genauigkeit und Übereinstimmung mit den realen Daten zu verbessern.