In [4]:
# Import python libraries
#
import pandas as pd
import numpy as np
import importlib
from tqdm import tqdm
import pickle
import holidays
import pytz
from joblib import Parallel, delayed
import plotly.graph_objects as go

# Imports own modules.
# All imports are done relative to the root of the project.
# Therefore please add the project root to your environment variables.
#
import sys
sys.path.append('../')
import data.weather_data as weather_data
import data.demandprofiles_readout as demandprofiles
import data.standardprofiles_readout as standardprofiles
import LstmAdapter as LstmAdapter
import visualization as visualization

num_of_recs = 1


In [5]:
importlib.reload(weather_data)
importlib.reload(demandprofiles)
importlib.reload(LstmAdapter)
importlib.reload(standardprofiles)

# Readout the aggregated characteristic power profiles and bring it to 
# the format needed by the model
#
pickle_path = '../data/london_loadprofiles.pkl'
powerProfiles = pd.read_pickle(pickle_path)

# Readout the weather data
#
startDate = powerProfiles[0].index[0].to_pydatetime().replace(tzinfo=None)
endDate = powerProfiles[0].index[-1].to_pydatetime().replace(tzinfo=None)
weather_measurements = weather_data.WeatherMeasurements()
weatherData = weather_measurements.get_data(
            startDate = startDate, 
            endDate = endDate,
            lat = 51.5085,      # Location:
            lon = -0.1257,      # London Heathrow,
            alt = 25,           # Weatherstation   
            sample_periode = 'hourly', 
            tz = 'UTC',
            )
weatherData = weatherData.loc[:, (weatherData != 0).any(axis=0)]    # remove empty columns

# Load the public holiday calendar
public_holidays_dict = holidays.CountryHoliday('GB', prov='ENG', years=range(2010, 2015))
public_holidays_timestamps = [pd.Timestamp(date, tzinfo=pytz.utc) for date in public_holidays_dict.keys()]

# Bring the power profiles to the format needed by the model and store them
#
for i, powerProfile in tqdm(enumerate(powerProfiles[:num_of_recs]), total=len(powerProfiles[:num_of_recs])):
    
    # Preprocess data to get X and Y for the model
    filename = 'outputs/file_' + str(i) + '.pkl'   
    lstmAdapter = LstmAdapter.LstmAdapter(public_holidays_timestamps, train_size = 466, dev_size = 0, 
                                            add_tda_features=False, addLaggedPower=True, shuffle_data=False,
                                            use_persistent_entropy = False, seed=0)

    X, Y = lstmAdapter.transformData(powerProfile, weatherData)
    with open(filename, 'wb') as file:
        pickle.dump((X, Y, lstmAdapter), file)


100%|██████████| 1/1 [00:01<00:00,  1.82s/it]


In [60]:
import ModelTrainer as modelTrainer
importlib.reload(modelTrainer)

test_config = {}
test_config['models'] = ['KNN',] # ['xLSTM', 'LSTM', 'Transformer', ]   
test_config['data'] = ['outputs/file_' + str(i) + '.pkl' for i in range(1)]
myModelTrainer = modelTrainer.ModelTrainer(test_config, use_multiprocessing=False)
train_histories, myModels = myModelTrainer.run()


100%|██████████| 1/1 [00:00<00:00, 78.76it/s]


In [61]:
# import os
# filename = 'outputs/model_evaluation_20240922.pkl'   
# if not os.path.exists(filename):  # Check if the file already exists
#     with open(filename, 'wb') as file:
#         pickle.dump((train_histories, myModels), file)
#     print("New file written!")
# with open(filename, 'rb') as file:
#     (train_histories, myModels) = pickle.load(file)


In [62]:
import plotly.io as pio
import ModelTrainer as modelTrainer
importlib.reload(modelTrainer)
importlib.reload(visualization)

# Evaluate the finetuned models
#
test_profile = "outputs/file_0.pkl"
with open(test_profile, 'rb') as f:
    (X, Y, lstmAdapter) = pickle.load(f)
plotlyApp = visualization.PlotlyApp(X, Y, myModels[("KNN", test_profile)], lstmAdapter, None, )
plotlyApp.run(myport=8055)


In [70]:
# Evaluate the results
#

# Get the loss out of the keras history object
train_losses, val_losses  = {}, {}
for key, value in train_histories.items():
    train_losses[key] = value['loss'][-1]
    val_losses[key] = value['val_loss'][-1]

def print_metric(metric):    

    values = list(metric.values())
    ref = 0 #np.mean(values[num_of_recs:])
    # print("Ref: ", ref)
    for test_id, value in metric.items():
        diff = ref - value        
        print(f"Abs.: {value:.4f}, Diff.: {diff:.4f}. With {test_id}")

num_of_recs = 1
# print("\nMean with    TDA: ", np.mean(list(val_losses.values())[:num_of_recs]))
# print("Mean else       : ", np.mean(list(val_losses.values())[num_of_recs:]))
# print("Median with  TDA: ", np.median(list(val_losses.values())[:num_of_recs]))
# print("Median else     : ", np.median(list(val_losses.values())[num_of_recs:]))

print("\n\nTest-MSE:")
print_metric(val_losses)
print("\n\nTrain-MSE:")
print_metric(train_losses)




Test-MSE:
Abs.: 0.3512, Diff.: -0.3512. With ('KNN', 'outputs/file_0.pkl')


Train-MSE:
Abs.: 0.0000, Diff.: 0.0000. With ('KNN', 'outputs/file_0.pkl')


In [71]:
import plotly.express as px

# Create a combined list of loss values and corresponding run names
combined_loss_data = []
for run_id, history in train_histories.items():
    combined_loss_data.extend([(run_id, epoch + 1, loss) for epoch, loss in enumerate(history['loss'])])

# Create a DataFrame from the combined data
import pandas as pd
df = pd.DataFrame(combined_loss_data, columns=['Run', 'Epoch', 'Loss'])

# Plot the loss of all items over the training epochs in one single plot
fig = px.line(df, x='Epoch', y='Loss', color='Run', labels={'Loss': 'Training Loss', 'Epoch': 'Epoch'})
fig.update_layout(title='Training Loss Over Epochs for Different Runs')
fig.update_yaxes(range=[0, 1])
fig.show()
