In [1]:
import pandas as pd
from typing import List, Tuple
import numpy as np

from preprocess import create_min_max_df, scale_data, downsample_timeseries_data, slice_timeseries_data, masked_expand
from postprocess import generate_predictions

import importlib
import time

# modules = ['cerberus_builder_noknown']
modules = ['cerberus_builder', 
           'cerberus_builder_attention',
           'cerberus_builder_attention_noneck',
           'cerberus_builder_noneck',
           'cerberus_builder_attention_hybrid',
           'cerberus_builder_noknown'
           ]

# Setup
df = pd.read_csv(r"../data/jena_climate_2009_2016.csv",
                parse_dates=['Date Time'], 
                index_col=['Date Time'])
df.index = pd.to_datetime(df.index, format='%d.%m.%Y %H:%M:%S')
df = df.iloc[:5000,:]
context_windows = ['1H', '2H', '6H']
context_sizes = [24, 12, 6]
call_window = '10T'
call_size = 24
response_window = '10T'
response_size = 8
call_feature_index = range(0,14)
context_feature_index = [range(0,14),
                        range(0,14),
                        range(0,14)]
response_feature_index = [0, 1, 4]
thresholds = {
    'call': 0.7,
    'response': 0.7,
    'context_0': 0.7,
    'context_1': 0.7,
    'context_2': 0.7
}
sizes = {
    'call': 24,
    'response': 8,
    'context_0': 24,
    'context_1': 12,
    'context_2': 6
}

#Scale Data
min_max_df = create_min_max_df(df)

scaled_df = scale_data(df, min_max_df, feature_range=(0, 1))

downsampled_data = downsample_timeseries_data(scaled_df, 
                                        context_windows, 
                                        call_window, 
                                        response_window,
                                        call_feature_index,
                                        context_feature_index,
                                        response_feature_index)

sliced_data, selected_timestamps = slice_timeseries_data(downsampled_data,
                                    sizes,
                                    thresholds)

expanded_dict, response_data = masked_expand(sliced_data, sizes)

# Placeholder for results
results = []
models = []


In [2]:
# Loop through each module and perform tests
for module_name in modules:
    print(module_name)
    # Dynamically import the required functions
    module = importlib.import_module(module_name)
    build_cerberus = getattr(module, 'build_cerberus')
    train_cerberus = getattr(module, 'train_cerberus')
    
    tic = time.time()
    # Test cerberus
    model = build_cerberus(expanded_dict, response_data, 64)
    model = train_cerberus(model,expanded_dict, response_data, 30)
    
    train_call = expanded_dict['call']
    train_contexts = [expanded_dict[key] for key in expanded_dict if 'context' in key]
    train_response = expanded_dict['response']
    
    predicted = model.predict([train_call] + train_contexts + [train_response])

    training_rmse = np.sqrt(np.mean((predicted - response_data) ** 2))
    
    # Check individual generation
    selected_data = {key: value[400:401,:] for key, value in sliced_data.items()}
    responses_generated = generate_predictions(model,selected_data)
    observed = selected_data['response'][0,:,:]
    
    generated_rmse = np.sqrt(np.mean((responses_generated - observed) ** 2))
    
    toc = time.time()
     
    total_time = toc-tic
    
    # Record results
    results.append({
        'module': module_name,
        'train_time': total_time,
        'training_rmse': training_rmse,
        'generated_rmse':  generated_rmse
    })
    models.append(model)


cerberus_builder
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30

In [None]:
results_df = pd.DataFrame(results)
results_df

In [None]:
# Load in the best model
min_index = results_df['generated_rmse'].idxmin()
model = models[0]

In [None]:
selected_data = {key: value[400:401,:] for key, value in sliced_data.items()}
responses_generated = generate_predictions(model,selected_data)
print(selected_data['response'])
print(responses_generated)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example matrices
observed = selected_data['response'][0,:,:]
modeled = responses_generated

# Number of rows and columns
num_rows, num_cols = observed.shape

# Create a plot for each feature (column)
for i in range(num_cols):
    plt.figure(figsize=(10, 6))
    plt.plot(observed[:, i], label='Observed - Feature {}'.format(i+1))
    plt.plot(modeled[:, i], label='Modeled - Feature {}'.format(i+1))
    plt.title(f'Feature {i+1} Comparison')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.legend()
    plt.show()
