# Wgrywanie bibliotek

Import necessary libraries

In [59]:
import csv
import glob
import os
# from PIL import Image
import time
import pickle
import math
import numpy as np
import pandas as pd
import openpyxl
import zipfile
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from tbats import TBATS

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.statespace.sarimax import SARIMAX

# import tensorflow as tf
# from keras import backend as K
# from tensorflow.keras.utils import plot_model
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense, Dropout

# Ustawienia

Settings

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

Output folders to save files

In [3]:
# Get the path to the current folder where the notebook is located
current_folder = os.path.dirname(os.path.abspath("__file__"))

images_output_folder = os.path.join(current_folder, "../02_paper/out_figures/")
tables_output_folder = os.path.join(current_folder, "../02_paper/out_tables/")
models_output_folder = os.path.join(current_folder, "../../models/models")

# Wgranie danych
* Get the path to the "data.zip" file
* Unpacking the file
* Uploading the CSV (both: **_data_** and **_data_dict_**)

In [4]:
# Get the path to the "data" folder inside the repository
data_folder = os.path.join(current_folder, "..", "00_data")

# Get the path to the "data.zip" file inside the "data" folder
data_zip_path = os.path.join(data_folder, "data.zip")

# Check if both entsoe_country_file and entsoe_country_dict_file exist in the target location
entsoe_country_file = os.path.join(data_folder, "entsoe_country.csv")
entsoe_country_dict_file = os.path.join(data_folder, "entsoe_country_dict.csv")

if not (os.path.exists(entsoe_country_file) and os.path.exists(entsoe_country_dict_file)):
    # Extract the data.zip file
    with zipfile.ZipFile(data_zip_path, 'r') as zip_ref:
        zip_ref.extractall(data_folder)
    
# Read the CSV files and create DataFrames
data = pd.read_csv(entsoe_country_file, sep=';')
data_dict = pd.read_csv(entsoe_country_dict_file, sep=',')

In [5]:
sample_data_tab_00 = data[data["Variable"] == "BZN_PL"].dropna().head(5)
sample_data_tab_00 = sample_data_tab_00.rename(columns={
    'TotalLoad_Actual_MW': 'TotalLoad\_Actual\_MW',
    'TotalLoad_Forecast_MW': 'TotalLoad\_Forecast\_MW'
})
sample_data_tab_00['Variable'] = sample_data_tab_00['Variable'].replace({'BZN_PL': 'BZN\_PL'}, regex=True)
(sample_data_tab_00).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_01.tex"), hrules=True)

Liczba wierszy

In [6]:
data.shape[0]

3811848

# Przygotowanie danych (część 1)

In [7]:
data = data.drop_duplicates()

# Country name mapping
data['CountryCode'] = data['Variable'].map(lambda x: x.lstrip('BZN_'))
data = pd.merge(data, data_dict, on="CountryCode")
data = data.drop(['Variable', 'CountryCode'], axis=1)

data['Timestamp'] = pd.to_datetime(data['Timestamp'])

In [8]:
(data_dict).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_02.tex"), hrules=True)

In [9]:
# Select data only for Romania from 2021-01-31 00:15:00
romania_data = data[(data['Country'] == 'Romania') & (data['Timestamp'] >= '2021-01-31 00:15:00')]

# Create a column with full hours (rounded down to the nearest hour)
romania_data.loc[:, 'Timestamp'] = romania_data['Timestamp'].dt.floor('H')

# Group the data by full hours and calculate the mean of four measurements
aggregated_romania_data = romania_data.groupby('Timestamp')['TotalLoad_Actual_MW'].mean().reset_index()
aggregated_romania_data["Country"] = "Romania"

# Select the indices of the original data for Romania that meet the conditions
indices_to_remove = data[(data['Country'] == 'Romania') & (data['Timestamp'] >= '2021-01-31 00:15:00')].index

# Remove the original data for Romania that meets the conditions
data = data.drop(indices_to_remove)
data = data.drop(columns=["TotalLoad_Forecast_MW"])

# Concatenate the updated data for Romania
data = pd.concat([data, aggregated_romania_data], ignore_index=True)

In [10]:
sample_data_tab_03 = data[data["Country"] == "Poland"].dropna().head(5)
sample_data_tab_03['TotalLoad_Actual_MW'] = sample_data_tab_03['TotalLoad_Actual_MW'].apply(lambda x: '{:.2f}'.format(x))
sample_data_tab_03 = sample_data_tab_03.rename(columns={
    'TotalLoad_Actual_MW': 'TotalLoad\_Actual\_MW'
})
(sample_data_tab_03).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_03.tex"), hrules=True)

## Analiza brakujących danych

Liczba wierszy po wstępnym przygotowaniu danych

In [11]:
data.shape[0]

2161177

Liczba wierszy z NaN

In [12]:
data['TotalLoad_Actual_MW'].replace(0, np.nan, inplace=True)
data["TotalLoad_Actual_MW"].isna().sum()

41902

In [13]:
nan_values_per_country = []

# Unique countries in the 'Country' column
countries = data['Country'].unique()

# Loop through each country
for country in countries:
    # Select rows only for the current country
    df_country = data[data['Country'] == country]
    
    # Number of all rows in the country
    num_rows = len(df_country)
    
    # Number of NaN in the 'TotalLoad_Actual_MW' column
    num_nan = df_country['TotalLoad_Actual_MW'].isna().sum()
    
    # Percentage of NaN for all rows in the country
    percent_nan = (num_nan / num_rows) * 100
    percent_nan = '{:.2f}'.format(percent_nan)
    
    # Add results to the list
    nan_values_per_country.append({
        'Country': country,
        'NumRows': num_rows,
        'NumNaN': num_nan,
        'PercentNaN': percent_nan
    })

# Create a DataFrame from the results
nan_values_per_country = pd.DataFrame(nan_values_per_country)

nan_values_per_country.style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_04.tex"), hrules=True)

In [14]:
def find_missing_value_date_ranges(dataframe):
    dataframe['Timestamp'] = pd.to_datetime(dataframe['Timestamp'])

    # Creation the resulting DataFrame.
    result = []

    # Iteration over unique countries.
    for country in dataframe['Country'].unique():
        country_data = dataframe[dataframe["Country"] == country][["Timestamp", "TotalLoad_Actual_MW"]]  
        country_data['Timestamp'] = pd.to_datetime(country_data['Timestamp'])

        # Aggregation data to full days and sorting.
        agg_df = country_data.resample('D', on='Timestamp').sum().reset_index()

        for metric in ["TotalLoad_Actual_MW"]:  
            start_date = None
            end_date = None

            for index, row in agg_df.iterrows():
                if row[metric] == 0.00:
                    if start_date is None:
                        start_date = row['Timestamp']
                elif start_date is not None:
                    end_date = row['Timestamp']
                    number_of_days = (end_date - start_date).days + 1
                    result.append({'Country': country, 'Metric': metric, 'start_date': start_date, 'end_date': end_date, 'number_of_days': number_of_days})
                    start_date = None
                    end_date = None
            
            # Handling missing end_date - if we have reached the last record
            if end_date is None and start_date is not None:
                end_date = agg_df['Timestamp'].iloc[-1]
                number_of_days = (end_date - start_date).days + 1
                result.append({'Country': country, 'Metric': metric, 'start_date': start_date, 'end_date': end_date, 'number_of_days': number_of_days})

    missing_value_date_ranges = pd.DataFrame(result)
    missing_value_date_ranges = missing_value_date_ranges.sort_values(by=['Country', 'Metric']).reset_index(drop=True)

    missing_value_date_ranges.drop(columns=['Metric'], inplace=True) 
    
    return missing_value_date_ranges

missing_value_date_ranges = find_missing_value_date_ranges(data)

column_mapping = {
    'Country': 'Country',
    'start_date': 'Start Date',
    'end_date': 'End Date',
    'number_of_days': 'Number of Days'
}

missing_value_date_ranges.columns = [column_mapping[col] for col in missing_value_date_ranges.columns]

In [15]:
missing_value_date_ranges['Start Date'] = missing_value_date_ranges['Start Date'].dt.date
missing_value_date_ranges['End Date'] = missing_value_date_ranges['End Date'].dt.date

missing_value_date_ranges.style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_05.tex"), hrules=True)

### Wykresy

#### Przygotowanie danych do wykresów (agregacja do tygodnia)

In [16]:
data_prepared = data.set_index('Timestamp')

def remove_first_last(group):
    return group.iloc[1:-1]

# By default, in pandas, a week is defined as a calendar week starting from Monday and ending on Sunday.
# If a week spans across two years, it will be assigned to the year in which the majority of weekdays fall.
weekly_data = data_prepared.groupby('Country')[['TotalLoad_Actual_MW']].resample('W').sum().reset_index()
weekly_data = weekly_data.rename(columns={'Timestamp': 'Date'})
weekly_data = weekly_data.groupby('Country').apply(remove_first_last).reset_index(drop=True)

sample_data_tab_06 = weekly_data.head(5)
sample_data_tab_06.loc[:, 'TotalLoad_Actual_MW'] = sample_data_tab_06['TotalLoad_Actual_MW'].apply(lambda x: '{:.2f}'.format(x))
sample_data_tab_06 = sample_data_tab_06.rename(columns={
    'TotalLoad_Actual_MW': 'TotalLoad\_Actual\_MW'
})
(sample_data_tab_06).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_06.tex"), hrules=True)

#### _TotalLoad_Actual_MW dla kraju **przed imputacją**._

In [17]:
country_list = weekly_data['Country'].unique()

for country in country_list:
    electricity_consumption_per_country = weekly_data[weekly_data['Country'] == country].copy()
    electricity_consumption_per_country['Date'] = pd.to_datetime(electricity_consumption_per_country['Date'])
    electricity_consumption_per_country['TotalLoad_Actual_MW'].replace(0, np.nan, inplace=True)

    # Find the index of the first non-NaN value in the 'TotalLoad_Actual_MW' column
    first_non_nan_index = electricity_consumption_per_country['TotalLoad_Actual_MW'].first_valid_index()

    # Trim the DataFrame from that position
    if first_non_nan_index is not None:
        electricity_consumption_per_country = electricity_consumption_per_country.loc[first_non_nan_index:]
    
    plt.style.use('seaborn-v0_8')
    plt.rcParams['font.family'] = 'Times New Roman'
    
    plt.figure(figsize=(20, 5))
    plt.ylabel("Electricity Consumption (Terawatts)", fontsize=14)
    plt.title(f"Actual Electricity Consumption in Terawatts for Country: {country}", fontsize=16)
    
    formatter = ticker.ScalarFormatter(useMathText=True)
    formatter.set_scientific(False)
    formatter.set_powerlimits((-6, 6))
    plt.gca().yaxis.set_major_formatter(formatter)
    
    plt.plot(electricity_consumption_per_country["Date"], electricity_consumption_per_country["TotalLoad_Actual_MW"], color='steelblue')
    
    plt.xlim(electricity_consumption_per_country["Date"].iloc[0], electricity_consumption_per_country["Date"].iloc[-1])
    
    ticks = plt.gca().get_yticks()
    tick_labels = [f'{int(tick) / 1000000:.1f}' for tick in ticks]
    plt.gca().yaxis.set_major_locator(ticker.FixedLocator(ticks))
    plt.gca().set_yticklabels(tick_labels)

    output_file_path = os.path.join(images_output_folder, f"actual_electricity_consumption_{country}.png")
    if not os.path.exists(output_file_path):
        plt.savefig(output_file_path)
    
    plt.close()

## Imputacja

Imputation was performed using a weighted average with a 7-day window.

Currently, due to a significant amount of missing data for Cyprus, I am refraining from performing imputation for this country. This method is not efficient for Cyprus.

In [18]:
# Create a function for imputation
def impute_missing_values(row):
    if pd.isnull(row['TotalLoad_Imputed_MW']):
        # Select the last 30 measurements for the same weekday and hour
        recent_data = country_data[
            (country_data['Timestamp'].dt.weekday == row['Timestamp'].weekday()) &
            (country_data['Timestamp'].dt.hour == row['Timestamp'].hour)
        ].tail(30)

        # Fill the missing field with the mean of the last 30 measurements
        imputed_value = recent_data['TotalLoad_Actual_MW'].mean()
        return imputed_value
    else:
        return row['TotalLoad_Imputed_MW']

# Create a copy of the DataFrame data
data_imputed = data.copy()

# Add a column TotalLoad_Imputed_MW with the original data
data_imputed['TotalLoad_Imputed_MW'] = data_imputed['TotalLoad_Actual_MW']

# Unique countries in the DataFrame
unique_countries = data['Country'].unique()

# Loop for each country
for country in unique_countries:
    # Select data only for the specific country
    country_data = data[data['Country'] == country]
    # Find the index of the first non-NaN value in the 'TotalLoad_Actual_MW' column
    first_non_nan_index = country_data['TotalLoad_Actual_MW'].first_valid_index()
    # Trim the DataFrame to that position
    if first_non_nan_index is not None:
        country_data = country_data.loc[first_non_nan_index:]

    # Sort the DataFrame by the Timestamp column
    country_data = country_data.sort_values(by='Timestamp')

    # Apply the imputation function and save the results in the TotalLoad_Imputed_MW column
    data_imputed.loc[data_imputed['Country'] == country, 'TotalLoad_Imputed_MW'] = data_imputed[data_imputed['Country'] == country].apply(impute_missing_values, axis=1)

# Restore the original order of the DataFrame
data_imputed = data_imputed.sort_values(by='Timestamp')

### Wykresy

In [19]:
# Setting the index to 'Timestamp'.
data_imputed.set_index('Timestamp', inplace=True)

# Resampling and grouping by weeks
weekly_imputed_data = data_imputed.groupby('Country')[['TotalLoad_Actual_MW', 'TotalLoad_Imputed_MW']].resample('W').sum().reset_index()  
weekly_imputed_data = weekly_imputed_data.rename(columns={'Timestamp': 'Date'})
weekly_imputed_data = weekly_imputed_data.groupby('Country').apply(remove_first_last).reset_index(drop=True)
weekly_imputed_data = weekly_imputed_data.reset_index()

_TotalLoad_Actual_MW per country **after imputation**._

In [20]:
country_list = weekly_imputed_data['Country'].unique()
for country in country_list:
    electricity_consumption_per_country = weekly_imputed_data[weekly_imputed_data['Country'] == country]
    electricity_consumption_per_country = electricity_consumption_per_country.reset_index(drop=True)
    electricity_consumption_per_country.drop(columns="index", inplace=True)

    electricity_consumption_per_country = weekly_imputed_data[weekly_imputed_data['Country'] == country].copy()
    electricity_consumption_per_country['Date'] = pd.to_datetime(electricity_consumption_per_country['Date'])
    electricity_consumption_per_country['TotalLoad_Actual_MW'].replace(0, np.nan, inplace=True)

    # Find the index of the first non-NaN value in the 'TotalLoad_Actual_MW' column
    first_non_nan_index = electricity_consumption_per_country['TotalLoad_Actual_MW'].first_valid_index()

    # Trim the DataFrame from that position
    if first_non_nan_index is not None:
        electricity_consumption_per_country = electricity_consumption_per_country.loc[first_non_nan_index:]
    
    plt.style.use('seaborn-v0_8')
    plt.rcParams['font.family'] = 'Times New Roman'
    
    plt.figure(figsize=(20, 5))
    plt.ylabel("Electricity Consumption (Terawatts)", fontsize=14)
    plt.title(f"Imputed Electricity Consumption in Terawatts for Country: {country}", fontsize=16)
    
    formatter = ticker.ScalarFormatter(useMathText=True)
    formatter.set_scientific(False)
    formatter.set_powerlimits((-6, 6))
    plt.gca().yaxis.set_major_formatter(formatter)
    
    plt.plot(electricity_consumption_per_country["Date"], electricity_consumption_per_country["TotalLoad_Actual_MW"], color='firebrick', label='Actual')
    plt.plot(electricity_consumption_per_country["Date"], electricity_consumption_per_country["TotalLoad_Imputed_MW"], color='steelblue', label='Imputed')
    
    plt.xlim(electricity_consumption_per_country["Date"].iloc[0], electricity_consumption_per_country["Date"].iloc[-1])
    plt.legend()
    
    ticks = plt.gca().get_yticks()
    tick_labels = [f'{int(tick) / 1000000:.1f}' for tick in ticks]
    plt.gca().yaxis.set_major_locator(ticker.FixedLocator(ticks))
    plt.gca().set_yticklabels(tick_labels)

    output_file_path = os.path.join(images_output_folder, f"imputed_electricity_consumption_{country}.png")
    if not os.path.exists(output_file_path):
        plt.savefig(output_file_path)
    
    plt.close()

# Ostateczne przygotowanie danych do trenowania/testowania modeli

In [21]:
data_for_the_model = weekly_imputed_data[['Country', 'Date', 'TotalLoad_Imputed_MW']]
data_for_the_model.loc[:, 'Date'] = pd.to_datetime(data_for_the_model['Date']).dt.date

sample_data_tab_07 = data_for_the_model[data_for_the_model["Country"] == "Poland"].head(5)
sample_data_tab_07['TotalLoad_Imputed_MW'] = sample_data_tab_07['TotalLoad_Imputed_MW'].apply(lambda x: '{:.2f}'.format(x))
sample_data_tab_07 = sample_data_tab_07.rename(columns={
    'TotalLoad_Imputed_MW': 'TotalLoad\_Imputed\_MW'
})
(sample_data_tab_07).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_07.tex"), hrules=True)

# PREDYKCJE

In [60]:
country_list = sorted(data_for_the_model['Country'].unique())

## METRYKI

In [23]:
def MAPE(y, y_pred):
    mape = np.mean(np.abs((y - y_pred)/y))*100
    return round(mape, 2)

def ME(y, y_pred):
    me = np.mean(y_pred - y)
    return round(me, 2)

def lstm_me(y_true, y_pred):
    return K.mean(y_pred - y_true, axis=-1)

def RMSE(MSE):
    rmse = math.sqrt(MSE)
    return round(rmse, 2)

## MODELE

### SARIMA

#### Szukanie optymalnych parametrów

The **sarima_split_best_params_search_fit_predict_plot()** function was employed to discover the optimal parameters for SARIMA models for each country, as well as to evaluate the model performance using the selected parameters. Commented out to avoid re-searching parameters.

In [24]:
# def sarima_split_best_params_search_fit_predict_plot(country_name, data):
    
#     dataset = data.values
#     train_data = data[data.index <= '2019-12-31']
#     test_data = data[data.index >= '2020-01-01']
    
#     p = range(0, 2)
#     d = range(0, 2)
#     q = range(0, 2)
#     P = range(0, 2)
#     D = range(1, 2)
#     Q = range(0, 2)
#     s = 12

#     best_aic = float("inf")
#     best_params = None

#     for p_val in p:
#         for d_val in d:
#             for q_val in q:
#                 for P_val in P:
#                     for D_val in D:
#                         for Q_val in Q:
#                             try:
#                                 model = SARIMAX(train_data, order=(p_val, d_val, q_val), seasonal_order=(P_val, D_val, Q_val, s))
#                                 fit_model = model.fit()
#                                 aic = fit_model.aic
#                                 if aic < best_aic:
#                                     best_aic = aic
#                                     best_params = (p_val, d_val, q_val, P_val, D_val, Q_val)
#                             except:
#                                 continue

#     print(f"Best SARIMA parameters for {country_name}:", best_params)

#     results = {
#         'country': country_name,
#         'best_params': best_params,
#     }
    
#     try:
#         p_val, d_val, q_val, P_val, D_val, Q_val = best_params
#         s = 52 
#         model = SARIMAX(train_data, order=(p_val, d_val, q_val), seasonal_order=(P_val, D_val, Q_val, s))
#         fit_model = model.fit()
#         yhat = fit_model.predict(start=len(train_data), end=(len(dataset)-1))
    
#         pd.DataFrame(yhat).plot()
#         data.plot(figsize=(20, 5))
#         plt.legend()
#     except TypeError:
#         pass
    
#     return results

# results_list = []

# for country in data_for_the_model['Country'].unique():
#     try: 
#         print(f'Evaluation for country: {country}')
#         country_data = data_for_the_model[data_for_the_model['Country'] == country].set_index('Date').asfreq('W')
#         results = sarima_split_best_params_search_fit_predict_plot(country, country_data["TotalLoad_Imputed_MW"])
#         results_list.append(results)
#     except Exception as e:
#         print(f"An error occurred for country {country}: {e}")

# sarima_best_params = {results['country']: results['best_params'] for results in results_list}

# sarima_best_params_df = pd.DataFrame(results_list)
# display(sarima_best_params_df)
# (sarima_best_params_df).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_08.tex"), hrules=True)

#### Trenowanie, predykcja, wykresy i ewaluacja modelu

In [36]:
def sarima_fit_predict_plot_evaluate(country_name, country_data, best_params):
    dataset = country_data.values
    train_data = country_data[country_data.index <= '2019-12-31']
    test_data = country_data[country_data.index >= '2020-01-01']
    
    s = 52

    p_val, d_val, q_val, P_val, D_val, Q_val = best_params

    model = SARIMAX(train_data, order=(p_val, d_val, q_val), seasonal_order=(P_val, D_val, Q_val, s))
    fit_model = model.fit()
    yhat = fit_model.predict(start=len(train_data), end=(len(dataset)-1))

    model_filename = f"sarima_model_{country_name}.pkl"
    model_file_path = os.path.join(models_output_folder, model_filename)

    if not os.path.exists(model_file_path):
        with open(model_file_path, 'wb') as model_file:
            pickle.dump(fit_model, model_file)

    plt.style.use('seaborn-v0_8')
    plt.rcParams['font.family'] = 'Times New Roman'
    
    plt.figure(figsize=(20, 5))
    plt.ylabel("Electricity Consumption (Terawatts)", fontsize=14)
    plt.title(f"SARIMA Prediction for Electricity Consumption in Terawatts for Country: {country_name}", fontsize=16)

    yhat_df = pd.DataFrame(yhat)
    country_data_subset = country_data.iloc[1:-1]  
    yhat_df_subset = yhat_df.iloc[1:-1]
    
    formatter = ticker.ScalarFormatter(useMathText=True)
    formatter.set_scientific(False)  
    formatter.set_powerlimits((-6, 6))  
    plt.gca().yaxis.set_major_formatter(formatter)
    
    plt.plot(yhat_df_subset, color="firebrick", label='Predicted')
    plt.plot(country_data_subset, color='steelblue', label='Actual')
    
    plt.xlim(country_data_subset.index[0], country_data_subset.index[-1])
    plt.legend()
    
    # Changing the Y-axis value labels from 5000000 to 5.0
    plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])

    output_file_path = os.path.join(images_output_folder, f"sarima_predictions_{country}.png")
    if not os.path.exists(output_file_path):
        plt.savefig(output_file_path)
    
    plt.close()
    
    MAPE_metric = MAPE(test_data, yhat)
    ME_metric = ME(test_data, yhat)
    MAE_metric = round(mean_absolute_error(test_data, yhat), 2)
    MSE_metric = round(mean_squared_error(test_data, yhat), 2)
    RMSE_metric = RMSE(MSE_metric)

    results = {
        'Model': 'sarima',
        'Country': country_name,
        'MAPE': MAPE_metric,
        'ME': ME_metric,
        'MAE': MAE_metric,
        'MSE': MSE_metric,
        'RMSE': RMSE_metric
    }
    
    return results

# Dictionary of best SARIMA parameters obtained using sarima_split_best_params_search_fit_predict_plot() function.
sarima_best_params = {
    'Austria': (0, 1, 0, 1, 1, 1),
    'Croatia': (0, 1, 0, 1, 1, 1),
    'Cyprus': (0, 1, 0, 1, 1, 1),
    'Czech Republic': (0, 1, 0, 1, 1, 1),
    'Denmark': (0, 1, 0, 1, 1, 1),
    'Estonia': (0, 1, 0, 1, 1, 1),
    'Finland': (0, 1, 0, 1, 1, 1),
    'France': (0, 1, 0, 1, 1, 1),
    'Germany': (0, 1, 0, 1, 1, 1),
    'Greece': (0, 1, 0, 1, 1, 1),
    'Hungary': (0, 1, 0, 1, 1, 1),
    'Ireland': (0, 1, 0, 1, 1, 1),
    'Italy': (0, 1, 0, 1, 1, 1),
    'Latvia': (0, 1, 0, 1, 1, 1),
    'Lithuania': (0, 1, 0, 1, 1, 1),
    'Luxembourg': (0, 1, 0, 1, 1, 1),
    'Poland': (0, 1, 0, 1, 1, 1),
    'Portugal': (0, 1, 0, 1, 1, 1),
    'Romania': (0, 1, 0, 1, 1, 1),
    'Slovakia': (0, 1, 0, 1, 1, 1),
    'Slovenia': (0, 1, 0, 1, 1, 1),
    'Spain': (0, 1, 0, 1, 1, 1),
    'Sweden': (0, 1, 0, 1, 1, 1)
}

results_list = []

for country in data_for_the_model['Country'].unique():
    try: 
        print(f'Evaluation for country: {country}')
        country_data = data_for_the_model[data_for_the_model['Country'] == country].set_index('Date').asfreq('W')
        best_params = sarima_best_params.get(country)
        if best_params is None:
            print(f"No parameters found for country: {country}")
            continue
        results = sarima_fit_predict_plot_evaluate(country, country_data["TotalLoad_Imputed_MW"], best_params)
        results_list.append(results)
    except Exception as e:
        print(f"An error occurred for country {country}: {e}")

sarima_results = pd.DataFrame(results_list)

Evaluation for country: Austria


  warn('Non-invertible starting seasonal moving average'
  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Croatia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Cyprus


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Czech Republic


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Denmark


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Estonia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Finland


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: France


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Germany


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Greece


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Hungary


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Ireland


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Italy


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Latvia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Lithuania


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Luxembourg


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Poland


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Portugal


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Romania


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Slovakia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Slovenia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Spain


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])
  warn('Non-invertible starting seasonal moving average'


Evaluation for country: Sweden


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


In [26]:
sarima_best_params_df = pd.DataFrame(list(sarima_best_params.items()), columns=['Country', 'Params'])

# Rozdzielenie kolumny Params na osobne kolumny
sarima_best_params_df[['p', 'd', 'q', 'P', 'D', 'Q']] = pd.DataFrame(sarima_best_params_df['Params'].tolist(), index=sarima_best_params_df.index)

# Usunięcie kolumny Params
sarima_best_params_df = sarima_best_params_df.drop('Params', axis=1)
sarima_best_params_df

(sarima_best_params_df).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_08.tex"), hrules=True)

### TBATS

#### Szukanie optymalnych parametrów

The **tbats_split_best_params_search_fit_predict()** function was employed to discover the optimal parameters for TBATS models for each country, as well as to evaluate the model performance using the selected parameters. Commented out to avoid re-searching parameters.

In [27]:
# tbats_best_params = {}

# def tbats_split_best_params_search_fit_predict(country_name, data):
    
#     dataset = data.values
#     train_data = data[data.index <= '2019-12-31']
#     test_data = data[data.index >= '2020-01-01']
    
#     seasonal_periods = 52
#     use_arma_errors = True
#     use_box_cox_options = [True, False]
#     use_trend_options = [True, False]
#     n_jobs_option = os.cpu_count()
#     use_damped_trend = True
    
#     best_aic = float("inf")
#     best_params = None
#     best_result_dict = None

#     for use_box_cox in use_box_cox_options:
#         for use_trend in use_trend_options:
#             try:
#                 model = TBATS(seasonal_periods=[seasonal_periods],
#                               use_arma_errors=use_arma_errors,
#                               use_box_cox=use_box_cox,
#                               use_trend=use_trend,
#                               n_jobs=n_jobs_option,
#                               use_damped_trend=use_damped_trend
#                              )
#                 fit_model = model.fit(train_data)
#                 aic = fit_model.aic
#                 if aic < best_aic:
#                     best_aic = aic
#                     best_params = (
#                         seasonal_periods, 
#                         use_arma_errors, 
#                         use_box_cox,
#                         use_trend, 
#                         n_jobs_option,
#                         use_damped_trend
#                         )
#                     best_result_dict = {
#                     "country": country_name,
#                     "seasonal_period": seasonal_periods,
#                     "use_arma_errors": use_arma_errors,
#                     "use_box_cox": use_box_cox,
#                     "use_trend": use_trend,
#                     "use_damped_trend": use_damped_trend
#                 }
#             except:
#                 continue

#     tbats_best_params[country_name] = best_result_dict

#     print(f'Best params for country {country_name}: {best_params}')
    
#     results = {
#         'country': country_name,
#         'best_params': best_params,
#     }
    
#     return(results)

# results_list = []

# for country in data_for_the_model['Country'].unique():
#     try: 
#         print(f'Evaluation for country: {country}')
#         country_data = data_for_the_model[data_for_the_model['Country'] == country].set_index('Date').asfreq('W')
#         results = tbats_split_best_params_search_fit_predict(country, country_data["TotalLoad_Imputed_MW"])
#         results_list.append(results)
#     except Exception as e:
#         print(f"An error occurred for country {country}: {e}")

# tbats_best_params = {results['country']: results['best_params'] for results in results_list}

#### Trenowanie, predykcja, wykresy i ewaluacja modelu

In [37]:
def tbats_fit_predict_plot_evaluate(country_name, country_data, best_params):

    dataset = country_data.values
    train_data = country_data[country_data.index <= '2019-12-31']
    test_data = country_data[country_data.index >= '2020-01-01']

    (
    seasonal_period, 
    use_arma_errors, 
    use_box_cox,
    use_trend, 
    n_jobs_option,
    use_damped_trend
    ) = best_params
    
    model = TBATS(seasonal_periods=[seasonal_period],
                                    use_arma_errors=use_arma_errors,
                                    use_box_cox=use_box_cox,
                                    use_trend=use_trend,
                                    n_jobs=n_jobs_option,
                                    use_damped_trend=use_damped_trend
                                    )
    fit_model = model.fit(train_data)
    yhat = fit_model.forecast(steps=len(test_data))

    model_filename = f"tbats_model_{country_name}.pkl"
    model_file_path = os.path.join(models_output_folder, model_filename)

    if not os.path.exists(model_file_path):
        with open(model_file_path, 'wb') as model_file:
            pickle.dump(fit_model, model_file)

    start_date = '2020-01-07'
    end_date = '2021-09-05'
    date_range = pd.date_range(start=start_date, end=end_date, freq='7D')

    yhat_df = pd.DataFrame(yhat, index=date_range)
    yhat_df_subset = yhat_df.iloc[1:-1]

    country_data_subset = country_data.iloc[1:-1] 

    plt.style.use('seaborn-v0_8')
    plt.rcParams['font.family'] = 'Times New Roman'
    
    plt.figure(figsize=(20, 5))
    plt.ylabel("Electricity Consumption (Terawatts)", fontsize=14)
    plt.title(f"TBATS Prediction for Electricity Consumption in Terawatts for Country: {country_name}", fontsize=16)
    
    formatter = ticker.ScalarFormatter(useMathText=True)
    formatter.set_scientific(False)
    formatter.set_powerlimits((-6, 6))
    plt.gca().yaxis.set_major_formatter(formatter)
    
    plt.plot(yhat_df_subset, color="firebrick", label='Predicted')
    plt.plot(country_data_subset, color='steelblue', label='Actual')
    
    plt.xlim(country_data_subset.index[0], country_data_subset.index[-1])
    plt.legend()
    
    plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])

    output_file_path = os.path.join(images_output_folder, f"tbats_predictions_{country}.png")
    if not os.path.exists(output_file_path):
        plt.savefig(output_file_path)
    
    plt.close()

    MAPE_metric = MAPE(test_data, yhat)
    ME_metric = ME(test_data, yhat)
    MAE_metric = round(mean_absolute_error(test_data, yhat), 2)
    MSE_metric = round(mean_squared_error(test_data, yhat), 2)
    RMSE_metric = RMSE(MSE_metric)

    results = {
        'Model': 'tbats',
        'Country': country_name,
        'MAPE': MAPE_metric,
        'ME': ME_metric,
        'MAE': MAE_metric,
        'MSE': MSE_metric,
        'RMSE': RMSE_metric
    }

    return results

# Dictionary of best TBATS parameters obtained using tbats_split_best_params_search_fit_predict() function
tbats_best_params = {
    'Austria': (52, True, False, False, 4, True),
    'Croatia': (52, True, True, False, 4, True),
    'Cyprus': (52, True, False, False, 4, True),
    'Czech Republic': (52, True, True, True, 4, True),
    'Denmark': (52, True, True, False, 4, True),
    'Estonia': (52, True, True, False, 4, True),
    'Finland': (52, True, True, False, 4, True),
    'France': (52, True, True, False, 4, True),
    'Germany': (52, True, False, False, 4, True),
    'Greece': (52, True, True, False, 4, True),
    'Hungary': (52, True, False, False, 4, True),
    'Ireland': (52, True, False, False, 4, True),
    'Italy': (52, True, True, False, 4, True),
    'Latvia': (52, True, True, False, 4, True),
    'Lithuania': (52, True, False, True, 4, True),
    'Luxembourg': (52, True, False, False, 4, True),
    'Poland': (52, True, False, False, 4, True),
    'Portugal': (52, True, False, False, 4, True),
    'Romania': (52, True, True, True, 4, True),
    'Slovakia': (52, True, True, True, 4, True),
    'Slovenia': (52, True, False, False, 4, True),
    'Spain': (52, True, False, False, 4, True),
    'Sweden': (52, True, True, False, 4, True)
}

results_list = []

for country in data_for_the_model['Country'].unique():
    try: 
        print(f'Evaluation for country: {country}')
        country_data = data_for_the_model[data_for_the_model['Country'] == country].set_index('Date').asfreq('W')
        best_params = tbats_best_params.get(country)
        if best_params is None:
            print(f"No parameters found for country: {country}")
            continue
        results = tbats_fit_predict_plot_evaluate(country, country_data["TotalLoad_Imputed_MW"], best_params)
        results_list.append(results)
    except Exception as e:
        print(f"An error occurred for country {country}: {e}")

tbats_results = pd.DataFrame(results_list)



Evaluation for country: Austria


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Croatia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Cyprus


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Czech Republic


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Denmark


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Estonia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Finland


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: France


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Germany


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Greece


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Hungary


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Ireland


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Italy


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Latvia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Lithuania


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Luxembourg


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Poland


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Portugal


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Romania


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Slovakia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Slovenia


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Spain


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Evaluation for country: Sweden


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


In [79]:
tbats_best_params_df = pd.DataFrame(list(tbats_best_params.items()), columns=['Country', 'Params'])

# Splitting the 'Params' column into separate columns.
tbats_best_params_df[['seasonal\_period', 
    'use\_arma\_errors', 
    'use\_box\_cox',
    'use\_trend', 
    'n\_jobs\_option',
    'use\_damped\_trend']] = pd.DataFrame(tbats_best_params_df['Params'].tolist(), index=tbats_best_params_df.index)
tbats_best_params_df = tbats_best_params_df.drop('Params', axis=1)
tbats_best_params_df.loc[tbats_best_params_df['use\_trend'] == False, 'use\_damped\_trend'] = False
tbats_best_params_df
(tbats_best_params_df).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_09.tex"), hrules=True)

### LSTM

#### Trenowanie, predykcja, wykresy i ewaluacja modelu

In [61]:
def prepare_data(data, look_back=10):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:i + look_back])
        y.append(data[i + look_back])
    return np.array(X), np.array(y)

def lstm_fit_predict_plot_evaluate(country_name, country_data):
    
    train_data = country_data[country_data.index <= pd.to_datetime('2019-12-31', format='%Y-%m-%d')]
    test_data = country_data[country_data.index >= pd.to_datetime('2020-01-01', format='%Y-%m-%d')]
    
    look_back = 10

    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data = scaler.fit_transform(train_data.values.reshape(-1, 1))
    test_data_rescaled = scaler.transform(test_data.values.reshape(-1, 1))

    X_train, y_train = prepare_data(train_data, look_back)
    X_test, y_test = prepare_data(test_data_rescaled, look_back)

    model = Sequential()
    model.add(LSTM(128, input_shape=(look_back, 1), return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', metrics=["mape", lstm_me], optimizer='adam')
    
    fit_model = model.fit(X_train, y_train, epochs=500, batch_size=1, verbose=2)

    yhat = model.predict(X_test)
    predictions_rescaled = scaler.inverse_transform(yhat)
    yhat_df = pd.DataFrame(predictions_rescaled, index=test_data.index[look_back:])

    model_filename = f"lstm_model_{country_name}.pkl"
    model_file_path = os.path.join(models_output_folder, model_filename)

    if not os.path.exists(model_file_path):
        with open(model_file_path, 'wb') as model_file:
            pickle.dump(fit_model, model_file)

    country_data_subset = country_data.iloc[1:-1] 

    plt.style.use('seaborn-v0_8')
    plt.rcParams['font.family'] = 'Times New Roman'
    
    plt.figure(figsize=(20, 5))
    plt.ylabel("Electricity Consumption (Terawatts)", fontsize=14)
    plt.title(f"LSTM Prediction for Electricity Consumption in Terawatts for Country: {country_name}", fontsize=16)
    
    country_data_subset = country_data.iloc[1:-1] 
    yhat_df_subset = yhat_df.iloc[1:-1]
    
    formatter = ticker.ScalarFormatter(useMathText=True)
    formatter.set_scientific(False)
    formatter.set_powerlimits((-6, 6))
    plt.gca().yaxis.set_major_formatter(formatter)
    
    plt.plot(yhat_df_subset, color="firebrick", label='Predicted')
    plt.plot(country_data_subset, color='steelblue', label='Actual')
    
    plt.xlim(country_data_subset.index[0], country_data_subset.index[-1])
    plt.legend()
    
    plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])

    output_file_path = os.path.join(images_output_folder, f"lstm_predictions_{country}.png")
    if not os.path.exists(output_file_path):
        plt.savefig(output_file_path)
    
    plt.close()

    MAE_metric = round(mean_absolute_error(test_data.iloc[look_back:], yhat_df), 2)
    MSE_metric = round(mean_squared_error(test_data.iloc[look_back:], yhat_df), 2)
    RMSE_metric = RMSE(MSE_metric)

    results = {
        'Model': 'lstm',
        'Country': country_name,
        'MAE': MAE_metric,
        'MSE': MSE_metric,
        'RMSE': RMSE_metric
    }
    
    return fit_model, results

lstm_fit_history_results = pd.DataFrame(columns=["Country", "Epoch", "Loss", "MAPE", "ME"])
results_list = []

for country in country_list:
    try: 
        print(f'Evaluation for country: {country}')
        country_data = data_for_the_model[data_for_the_model['Country'] == country].set_index('Date').asfreq('W')
        country_history, country_results = lstm_fit_predict_plot_evaluate(country, country_data["TotalLoad_Imputed_MW"])
        locals()[f'history_{country}'] = country_history
        epochs = country_history.epoch
        loss = country_history.history['loss']
        mape = country_history.history['mape']
        me = country_history.history['lstm_me']

        temp_df = pd.DataFrame({
            "Country": [country] * len(epochs),
            "Epoch": epochs,
            "Loss": loss,
            "MAPE": mape,
            "ME": me
        })

        plt.style.use('seaborn-v0_8')
        plt.rcParams['font.family'] = 'Times New Roman'

        plt.figure(figsize=(20, 5))
        plt.ylabel("Mean Squared Error", fontsize=14)
        plt.title(f"LSTM Loss Function: {country}", fontsize=16)

        formatter = ticker.ScalarFormatter(useMathText=True)
        formatter.set_scientific(False)
        formatter.set_powerlimits((-6, 6))
        plt.gca().yaxis.set_major_formatter(formatter)

        # Loss function generation
        plt.plot(temp_df["Epoch"], temp_df["Loss"], color="steelblue", label='Loss Function')

        plt.legend()

        output_file_path = os.path.join(images_output_folder, f"lstm_loss_function_{country}.png")
        if not os.path.exists(output_file_path):
            plt.savefig(output_file_path)
    
        plt.close()
        
        lstm_fit_history_results = pd.concat([lstm_fit_history_results, temp_df], ignore_index=True)
        results_list.append(country_results)
    except Exception as e:
        print(f"An error occurred for country {country}: {e}")

lstm_results_without_me_mape = pd.DataFrame(results_list)
lstm_me_mape = lstm_fit_history_results[lstm_fit_history_results["Epoch"] == 499].drop(columns=["Epoch", "Loss"])
lstm_results = lstm_results_without_me_mape.merge(lstm_me_mape, on="Country")

lstm_fit_history_results.to_excel(os.path.join(tables_output_folder, "tab_10.xlsx"), index=False)

Evaluation for country: Austria
Epoch 1/500
250/250 - 7s - loss: 0.0930 - mape: 134.6035 - lstm_me: -9.4731e-03 - 7s/epoch - 28ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0757 - mape: 128.1908 - lstm_me: -7.8753e-03 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0624 - mape: 115.5752 - lstm_me: 0.0019 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0536 - mape: 83.0504 - lstm_me: 0.0050 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0547 - mape: 105.1104 - lstm_me: -1.9527e-03 - 2s/epoch - 8ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0497 - mape: 94.2078 - lstm_me: -2.6703e-03 - 2s/epoch - 9ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0487 - mape: 88.4294 - lstm_me: 0.0045 - 2s/epoch - 7ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0448 - mape: 95.5846 - lstm_me: -3.7808e-03 - 2s/epoch - 7ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0515 - mape: 77.7300 - lstm_me: 0.0109 - 2s/epoch - 6ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0505 - mape: 121.7677 - lstm_me: -4.82

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 5s - loss: 0.1072 - mape: 144.9124 - lstm_me: 0.0164 - 5s/epoch - 21ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0735 - mape: 169.5686 - lstm_me: 0.0028 - 2s/epoch - 6ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0666 - mape: 232.6066 - lstm_me: -4.4465e-04 - 2s/epoch - 6ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0628 - mape: 197.4393 - lstm_me: 0.0090 - 2s/epoch - 6ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0616 - mape: 195.0290 - lstm_me: 6.4643e-04 - 2s/epoch - 6ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0609 - mape: 189.8163 - lstm_me: 0.0033 - 2s/epoch - 6ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0593 - mape: 184.5673 - lstm_me: 0.0062 - 2s/epoch - 6ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0591 - mape: 210.1508 - lstm_me: 0.0023 - 2s/epoch - 6ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0606 - mape: 158.8611 - lstm_me: 0.0044 - 2s/epoch - 6ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0558 - mape: 150.7329 - lstm_me: 5.5788e-04 - 2s/epoch - 6ms/step
Epoch 11/500
25

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 6s - loss: 0.0894 - mape: 91.7236 - lstm_me: 0.0075 - 6s/epoch - 25ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0493 - mape: 135.1427 - lstm_me: 0.0045 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0419 - mape: 94.3395 - lstm_me: -1.5106e-03 - 2s/epoch - 8ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0361 - mape: 147.8815 - lstm_me: 4.0898e-04 - 2s/epoch - 8ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0366 - mape: 94.7054 - lstm_me: 0.0015 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0348 - mape: 181.5758 - lstm_me: 0.0065 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0338 - mape: 123.8513 - lstm_me: 8.5128e-05 - 2s/epoch - 6ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0304 - mape: 92.3166 - lstm_me: 0.0037 - 2s/epoch - 6ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0320 - mape: 136.0113 - lstm_me: -2.4305e-03 - 2s/epoch - 6ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0322 - mape: 79.7001 - lstm_me: 0.0047 - 2s/epoch - 6ms/step
Epoch 11/500
25

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 6s - loss: 0.1016 - mape: 122.6570 - lstm_me: 0.0115 - 6s/epoch - 25ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0750 - mape: 95.1836 - lstm_me: -4.5187e-03 - 2s/epoch - 8ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0688 - mape: 94.0208 - lstm_me: 0.0141 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0636 - mape: 92.3668 - lstm_me: -3.1542e-03 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0596 - mape: 74.8945 - lstm_me: 0.0023 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0545 - mape: 91.3837 - lstm_me: 0.0030 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0531 - mape: 89.3867 - lstm_me: 0.0055 - 2s/epoch - 8ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0506 - mape: 93.4920 - lstm_me: 5.2168e-04 - 2s/epoch - 8ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0492 - mape: 79.5094 - lstm_me: 8.9102e-04 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0505 - mape: 87.6979 - lstm_me: 1.7225e-04 - 2s/epoch - 8ms/step
Epoch 11/500
25

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 6s - loss: 0.0772 - mape: 235.6131 - lstm_me: -4.0839e-03 - 6s/epoch - 24ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0590 - mape: 142.7695 - lstm_me: -7.8076e-03 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0482 - mape: 170.0698 - lstm_me: 0.0023 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0477 - mape: 171.9209 - lstm_me: -4.5126e-03 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0450 - mape: 146.2346 - lstm_me: -4.1880e-04 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0443 - mape: 140.7346 - lstm_me: -3.0842e-03 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0390 - mape: 137.9636 - lstm_me: -5.0205e-03 - 2s/epoch - 7ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0440 - mape: 158.1172 - lstm_me: 3.3184e-04 - 2s/epoch - 7ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0413 - mape: 153.2386 - lstm_me: -3.5529e-03 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0392 - mape: 155.8561 - lstm_me: 0.0016 - 2s/epoch 

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 5s - loss: 0.0784 - mape: 111.6643 - lstm_me: 0.0075 - 5s/epoch - 22ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0451 - mape: 88.9167 - lstm_me: 0.0073 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0371 - mape: 68.2093 - lstm_me: -3.9905e-03 - 2s/epoch - 6ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0353 - mape: 71.9180 - lstm_me: 0.0016 - 2s/epoch - 6ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0398 - mape: 78.9335 - lstm_me: 0.0038 - 2s/epoch - 6ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0357 - mape: 69.0768 - lstm_me: -1.6158e-04 - 2s/epoch - 6ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0323 - mape: 72.6158 - lstm_me: 0.0067 - 2s/epoch - 6ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0325 - mape: 69.5224 - lstm_me: -5.1814e-03 - 2s/epoch - 6ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0319 - mape: 70.0498 - lstm_me: -7.0909e-04 - 2s/epoch - 6ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0313 - mape: 69.1478 - lstm_me: 8.2531e-04 - 2s/epoch - 6ms/step
Epoch 11/500


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 10s - loss: 0.0667 - mape: 191.3009 - lstm_me: 0.0089 - 10s/epoch - 40ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0376 - mape: 133.9694 - lstm_me: -6.5544e-03 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0366 - mape: 192.5083 - lstm_me: 0.0023 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0343 - mape: 137.7190 - lstm_me: 0.0052 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0344 - mape: 159.1300 - lstm_me: 0.0021 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0327 - mape: 184.3181 - lstm_me: 0.0037 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0352 - mape: 134.5641 - lstm_me: 0.0014 - 2s/epoch - 7ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0328 - mape: 149.2388 - lstm_me: 0.0061 - 2s/epoch - 7ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0305 - mape: 134.0726 - lstm_me: -3.0021e-03 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0318 - mape: 171.1377 - lstm_me: 0.0039 - 2s/epoch - 7ms/step
Epoch 11/500
250

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 5s - loss: 0.0885 - mape: 157.7691 - lstm_me: 0.0099 - 5s/epoch - 21ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0552 - mape: 156.9119 - lstm_me: 0.0075 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0494 - mape: 111.7459 - lstm_me: 0.0081 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0455 - mape: 122.4584 - lstm_me: -9.7379e-05 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0431 - mape: 104.8775 - lstm_me: -3.5019e-06 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0435 - mape: 109.5043 - lstm_me: 0.0055 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0441 - mape: 116.6078 - lstm_me: -2.5220e-03 - 2s/epoch - 7ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0410 - mape: 121.0113 - lstm_me: 0.0063 - 2s/epoch - 7ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0409 - mape: 100.8032 - lstm_me: 0.0076 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0388 - mape: 114.0184 - lstm_me: -5.2933e-03 - 2s/epoch - 7ms/step
Epoch 11

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 6s - loss: 0.1019 - mape: 258.6298 - lstm_me: -6.7422e-03 - 6s/epoch - 25ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0845 - mape: 141.9031 - lstm_me: 3.7104e-04 - 2s/epoch - 8ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0724 - mape: 145.7146 - lstm_me: -1.4281e-02 - 2s/epoch - 8ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0670 - mape: 147.9714 - lstm_me: 0.0138 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0611 - mape: 121.2341 - lstm_me: -5.6167e-03 - 2s/epoch - 8ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0586 - mape: 114.3760 - lstm_me: 0.0022 - 2s/epoch - 9ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0543 - mape: 101.6135 - lstm_me: -6.2843e-03 - 2s/epoch - 8ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0516 - mape: 106.0746 - lstm_me: 0.0021 - 2s/epoch - 9ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0575 - mape: 125.8386 - lstm_me: -1.2455e-03 - 2s/epoch - 8ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0511 - mape: 164.7742 - lstm_me: -4.8744e-03 - 2s/epoch - 9ms

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 7s - loss: 0.1156 - mape: 182.9115 - lstm_me: 0.0192 - 7s/epoch - 29ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0820 - mape: 178.9119 - lstm_me: 0.0031 - 2s/epoch - 10ms/step
Epoch 3/500
250/250 - 3s - loss: 0.0711 - mape: 148.8409 - lstm_me: 0.0038 - 3s/epoch - 10ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0747 - mape: 139.9470 - lstm_me: 0.0090 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0663 - mape: 152.3023 - lstm_me: 0.0015 - 2s/epoch - 8ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0658 - mape: 162.4104 - lstm_me: 0.0074 - 2s/epoch - 8ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0683 - mape: 148.3819 - lstm_me: -5.9271e-03 - 2s/epoch - 10ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0700 - mape: 137.1369 - lstm_me: 0.0057 - 2s/epoch - 7ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0685 - mape: 149.2596 - lstm_me: 0.0042 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0617 - mape: 131.7418 - lstm_me: 0.0014 - 2s/epoch - 10ms/step
Epoch 11/500
250/25

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 6s - loss: 0.1153 - mape: 165.0783 - lstm_me: 2.7882e-04 - 6s/epoch - 23ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0975 - mape: 162.4099 - lstm_me: -3.5942e-03 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0872 - mape: 132.2005 - lstm_me: 0.0062 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0858 - mape: 149.9705 - lstm_me: -4.6210e-03 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0810 - mape: 156.4481 - lstm_me: -5.5504e-03 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0790 - mape: 141.3726 - lstm_me: 0.0084 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0748 - mape: 140.4915 - lstm_me: -1.8835e-03 - 2s/epoch - 7ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0732 - mape: 161.7519 - lstm_me: 0.0063 - 2s/epoch - 8ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0716 - mape: 174.9671 - lstm_me: 0.0012 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0690 - mape: 154.4999 - lstm_me: -4.4550e-03 - 2s/epoch - 7ms/step

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 6s - loss: 0.0712 - mape: 494.5363 - lstm_me: -5.5754e-03 - 6s/epoch - 22ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0434 - mape: 325.1387 - lstm_me: -1.2922e-03 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0390 - mape: 379.5593 - lstm_me: -5.6888e-03 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0412 - mape: 399.9437 - lstm_me: 0.0045 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0353 - mape: 405.3294 - lstm_me: 0.0061 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0416 - mape: 288.5026 - lstm_me: -1.6419e-02 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0337 - mape: 354.2836 - lstm_me: 0.0076 - 2s/epoch - 7ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0349 - mape: 260.3103 - lstm_me: -3.5182e-03 - 2s/epoch - 7ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0403 - mape: 290.9546 - lstm_me: -8.1356e-03 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0335 - mape: 293.6189 - lstm_me: 4.4827e-04 - 2s/epoch - 7ms

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 7s - loss: 0.1183 - mape: 143.2108 - lstm_me: -1.5634e-03 - 7s/epoch - 26ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0957 - mape: 130.6925 - lstm_me: -2.4593e-03 - 2s/epoch - 8ms/step
Epoch 3/500
250/250 - 3s - loss: 0.0829 - mape: 126.3935 - lstm_me: 0.0041 - 3s/epoch - 11ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0689 - mape: 128.7600 - lstm_me: -3.9081e-03 - 2s/epoch - 8ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0651 - mape: 111.9525 - lstm_me: -4.4327e-04 - 2s/epoch - 8ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0651 - mape: 118.4604 - lstm_me: -7.1379e-03 - 2s/epoch - 8ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0583 - mape: 144.4090 - lstm_me: 0.0051 - 2s/epoch - 8ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0633 - mape: 134.1399 - lstm_me: -2.3713e-03 - 2s/epoch - 8ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0553 - mape: 139.2492 - lstm_me: -4.1895e-03 - 2s/epoch - 8ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0618 - mape: 122.7097 - lstm_me: 0.0014 - 2s/epoch - 8

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 6s - loss: 0.0649 - mape: 334.4348 - lstm_me: 0.0066 - 6s/epoch - 23ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0424 - mape: 221.1039 - lstm_me: 0.0010 - 2s/epoch - 7ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0380 - mape: 180.8838 - lstm_me: -2.0279e-03 - 2s/epoch - 7ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0397 - mape: 147.3639 - lstm_me: 0.0079 - 2s/epoch - 7ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0371 - mape: 186.9222 - lstm_me: -7.0828e-03 - 2s/epoch - 7ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0353 - mape: 144.7879 - lstm_me: -8.1946e-04 - 2s/epoch - 7ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0323 - mape: 140.4308 - lstm_me: 0.0024 - 2s/epoch - 7ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0330 - mape: 165.3162 - lstm_me: 0.0031 - 2s/epoch - 7ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0325 - mape: 204.8295 - lstm_me: -2.0807e-03 - 2s/epoch - 7ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0336 - mape: 164.4068 - lstm_me: 0.0010 - 2s/epoch - 7ms/step
Epoch 11

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 15s - loss: 0.0802 - mape: 131.1888 - lstm_me: 0.0013 - 15s/epoch - 61ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0576 - mape: 122.5276 - lstm_me: 0.0048 - 2s/epoch - 8ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0505 - mape: 121.7448 - lstm_me: 0.0026 - 2s/epoch - 8ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0534 - mape: 105.1913 - lstm_me: -3.8219e-03 - 2s/epoch - 9ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0527 - mape: 110.8523 - lstm_me: 0.0038 - 2s/epoch - 8ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0489 - mape: 120.5247 - lstm_me: 0.0034 - 2s/epoch - 9ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0446 - mape: 101.4491 - lstm_me: -1.8600e-03 - 2s/epoch - 8ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0458 - mape: 108.4858 - lstm_me: 0.0029 - 2s/epoch - 8ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0437 - mape: 110.8656 - lstm_me: 0.0012 - 2s/epoch - 8ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0444 - mape: 99.6313 - lstm_me: -3.7252e-03 - 2s/epoch - 8ms/step
Epoch 11/500

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 7s - loss: 0.1004 - mape: 162.2124 - lstm_me: -1.3606e-02 - 7s/epoch - 29ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0944 - mape: 164.3393 - lstm_me: -8.3276e-03 - 2s/epoch - 9ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0884 - mape: 175.7727 - lstm_me: -6.0150e-03 - 2s/epoch - 8ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0850 - mape: 171.4639 - lstm_me: -9.1041e-03 - 2s/epoch - 9ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0805 - mape: 169.1544 - lstm_me: -2.1441e-03 - 2s/epoch - 10ms/step
Epoch 6/500
250/250 - 3s - loss: 0.0764 - mape: 130.0061 - lstm_me: -1.1435e-02 - 3s/epoch - 11ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0722 - mape: 142.6123 - lstm_me: 5.4386e-04 - 2s/epoch - 8ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0742 - mape: 159.9175 - lstm_me: -5.5217e-03 - 2s/epoch - 8ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0739 - mape: 162.8576 - lstm_me: 0.0029 - 2s/epoch - 8ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0745 - mape: 174.9517 - lstm_me: -4.8646e-03 - 2s

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 8s - loss: 0.1101 - mape: 199.5543 - lstm_me: -3.4521e-03 - 8s/epoch - 30ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0933 - mape: 176.5645 - lstm_me: -5.3115e-04 - 2s/epoch - 9ms/step
Epoch 3/500
250/250 - 3s - loss: 0.0873 - mape: 187.8081 - lstm_me: 0.0036 - 3s/epoch - 12ms/step
Epoch 4/500
250/250 - 3s - loss: 0.0790 - mape: 201.9400 - lstm_me: -1.6766e-03 - 3s/epoch - 10ms/step
Epoch 5/500
250/250 - 3s - loss: 0.0806 - mape: 164.9807 - lstm_me: 9.2099e-04 - 3s/epoch - 11ms/step
Epoch 6/500
250/250 - 3s - loss: 0.0736 - mape: 181.2321 - lstm_me: 0.0090 - 3s/epoch - 10ms/step
Epoch 7/500
250/250 - 3s - loss: 0.0781 - mape: 159.5162 - lstm_me: -7.9918e-03 - 3s/epoch - 12ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0732 - mape: 146.9729 - lstm_me: 0.0025 - 2s/epoch - 9ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0733 - mape: 162.7214 - lstm_me: -5.4616e-03 - 2s/epoch - 8ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0703 - mape: 170.0128 - lstm_me: 0.0019 - 2s/epoch - 9ms

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 8s - loss: 0.1407 - mape: 154.9366 - lstm_me: 0.0093 - 8s/epoch - 32ms/step
Epoch 2/500
250/250 - 3s - loss: 0.1047 - mape: 134.3982 - lstm_me: -2.1098e-03 - 3s/epoch - 10ms/step
Epoch 3/500
250/250 - 3s - loss: 0.0979 - mape: 136.9338 - lstm_me: 0.0021 - 3s/epoch - 11ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0915 - mape: 127.4428 - lstm_me: -4.1425e-03 - 2s/epoch - 8ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0793 - mape: 113.2637 - lstm_me: 0.0013 - 2s/epoch - 8ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0779 - mape: 122.7854 - lstm_me: 0.0059 - 2s/epoch - 8ms/step
Epoch 7/500
250/250 - 3s - loss: 0.0790 - mape: 118.1572 - lstm_me: -5.6981e-03 - 3s/epoch - 11ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0809 - mape: 147.6493 - lstm_me: -1.8372e-05 - 2s/epoch - 9ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0808 - mape: 135.8127 - lstm_me: 0.0089 - 2s/epoch - 9ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0743 - mape: 108.6618 - lstm_me: 0.0060 - 2s/epoch - 8ms/step
Epoch

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 7s - loss: 0.0887 - mape: 185.2982 - lstm_me: -2.1925e-03 - 7s/epoch - 29ms/step
Epoch 2/500
250/250 - 4s - loss: 0.0670 - mape: 150.9523 - lstm_me: -3.5402e-03 - 4s/epoch - 15ms/step
Epoch 3/500
250/250 - 3s - loss: 0.0636 - mape: 135.3717 - lstm_me: 0.0041 - 3s/epoch - 13ms/step
Epoch 4/500
250/250 - 3s - loss: 0.0559 - mape: 146.8176 - lstm_me: -1.0480e-02 - 3s/epoch - 10ms/step
Epoch 5/500
250/250 - 3s - loss: 0.0564 - mape: 125.8524 - lstm_me: 0.0047 - 3s/epoch - 11ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0535 - mape: 128.8870 - lstm_me: 0.0031 - 2s/epoch - 9ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0531 - mape: 137.9195 - lstm_me: -3.4538e-03 - 2s/epoch - 9ms/step
Epoch 8/500
250/250 - 3s - loss: 0.0491 - mape: 128.3703 - lstm_me: 0.0076 - 3s/epoch - 10ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0472 - mape: 139.8798 - lstm_me: -2.6528e-03 - 2s/epoch - 10ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0489 - mape: 119.7161 - lstm_me: -5.7736e-03 - 2s/epoch - 9

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 7s - loss: 0.1052 - mape: 85.0109 - lstm_me: 0.0026 - 7s/epoch - 27ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0821 - mape: 88.7531 - lstm_me: 0.0052 - 2s/epoch - 10ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0749 - mape: 68.7971 - lstm_me: 0.0055 - 2s/epoch - 9ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0667 - mape: 67.5791 - lstm_me: -2.9135e-03 - 2s/epoch - 10ms/step
Epoch 5/500
250/250 - 3s - loss: 0.0601 - mape: 64.9201 - lstm_me: 9.4422e-04 - 3s/epoch - 13ms/step
Epoch 6/500
250/250 - 3s - loss: 0.0560 - mape: 73.6148 - lstm_me: 0.0049 - 3s/epoch - 10ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0524 - mape: 69.2893 - lstm_me: 5.5078e-04 - 2s/epoch - 9ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0507 - mape: 62.3060 - lstm_me: 0.0087 - 2s/epoch - 9ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0518 - mape: 62.8281 - lstm_me: 0.0017 - 2s/epoch - 9ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0495 - mape: 65.9771 - lstm_me: -3.4411e-03 - 2s/epoch - 8ms/step
Epoch 11/500
250

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 7s - loss: 0.0738 - mape: 218.2061 - lstm_me: 2.7289e-04 - 7s/epoch - 27ms/step
Epoch 2/500
250/250 - 3s - loss: 0.0643 - mape: 298.7492 - lstm_me: 0.0085 - 3s/epoch - 11ms/step
Epoch 3/500
250/250 - 3s - loss: 0.0563 - mape: 259.7091 - lstm_me: 2.2019e-04 - 3s/epoch - 11ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0535 - mape: 336.7195 - lstm_me: -1.6871e-03 - 2s/epoch - 10ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0499 - mape: 154.3109 - lstm_me: 0.0041 - 2s/epoch - 9ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0492 - mape: 299.5300 - lstm_me: -1.8983e-03 - 2s/epoch - 8ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0459 - mape: 237.3095 - lstm_me: 0.0044 - 2s/epoch - 9ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0459 - mape: 258.7832 - lstm_me: -6.6996e-03 - 2s/epoch - 9ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0465 - mape: 293.0797 - lstm_me: 0.0097 - 2s/epoch - 9ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0461 - mape: 294.3103 - lstm_me: -9.3702e-04 - 2s/epoch - 8ms/st

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 9s - loss: 0.1022 - mape: 538.4626 - lstm_me: -1.0313e-02 - 9s/epoch - 35ms/step
Epoch 2/500
250/250 - 3s - loss: 0.0841 - mape: 546.6586 - lstm_me: -1.2093e-02 - 3s/epoch - 11ms/step
Epoch 3/500
250/250 - 3s - loss: 0.0771 - mape: 429.9577 - lstm_me: 0.0038 - 3s/epoch - 12ms/step
Epoch 4/500
250/250 - 3s - loss: 0.0737 - mape: 664.3301 - lstm_me: -3.4886e-03 - 3s/epoch - 10ms/step
Epoch 5/500
250/250 - 3s - loss: 0.0679 - mape: 197.0311 - lstm_me: 0.0085 - 3s/epoch - 10ms/step
Epoch 6/500
250/250 - 3s - loss: 0.0665 - mape: 514.2142 - lstm_me: -2.3447e-03 - 3s/epoch - 12ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0650 - mape: 350.4937 - lstm_me: -6.5222e-03 - 2s/epoch - 10ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0606 - mape: 262.1955 - lstm_me: 3.2393e-04 - 2s/epoch - 9ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0647 - mape: 255.9704 - lstm_me: 8.5425e-04 - 2s/epoch - 8ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0639 - mape: 376.1973 - lstm_me: -1.5524e-03 - 2s/e

  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


Epoch 1/500
250/250 - 17s - loss: 0.0855 - mape: 253.8174 - lstm_me: 0.0081 - 17s/epoch - 68ms/step
Epoch 2/500
250/250 - 2s - loss: 0.0479 - mape: 153.6324 - lstm_me: 0.0029 - 2s/epoch - 9ms/step
Epoch 3/500
250/250 - 2s - loss: 0.0446 - mape: 205.4041 - lstm_me: 0.0049 - 2s/epoch - 9ms/step
Epoch 4/500
250/250 - 2s - loss: 0.0437 - mape: 191.4022 - lstm_me: 0.0025 - 2s/epoch - 9ms/step
Epoch 5/500
250/250 - 2s - loss: 0.0377 - mape: 187.7901 - lstm_me: -1.1237e-03 - 2s/epoch - 9ms/step
Epoch 6/500
250/250 - 2s - loss: 0.0384 - mape: 247.1675 - lstm_me: 0.0054 - 2s/epoch - 9ms/step
Epoch 7/500
250/250 - 2s - loss: 0.0370 - mape: 224.5420 - lstm_me: 7.5614e-04 - 2s/epoch - 9ms/step
Epoch 8/500
250/250 - 2s - loss: 0.0356 - mape: 257.7078 - lstm_me: 0.0026 - 2s/epoch - 9ms/step
Epoch 9/500
250/250 - 2s - loss: 0.0367 - mape: 259.3771 - lstm_me: 6.8081e-05 - 2s/epoch - 9ms/step
Epoch 10/500
250/250 - 2s - loss: 0.0351 - mape: 227.3395 - lstm_me: 0.0031 - 2s/epoch - 9ms/step
Epoch 11/500


  plt.gca().set_yticklabels([f'{int(tick) / 1000000:.1f}' for tick in plt.gca().get_yticks()])


# WYNIKI

## Metryki

In [62]:
evaluation_results = pd.concat([sarima_results, tbats_results, lstm_results], ignore_index=True).sort_values(by=['Country', 'Model'], ascending=[True, True])

evaluation_results.to_excel(os.path.join(tables_output_folder, "tab_11.xlsx"), index=False)

In [74]:
mape_comparison = evaluation_results.pivot(index='Country', columns='Model', values='MAPE')

mape_comparison.reset_index(inplace=True)
mape_comparison = mape_comparison.rename(columns={"lstm": "LSTM", "sarima": "SARIMA", "tbats": "TBATS"})
mape_comparison = mape_comparison[["Country", "SARIMA", "TBATS", "LSTM"]]

sarima_avg_value = mape_comparison['SARIMA'].mean()
tbats_avg_value = mape_comparison['TBATS'].mean()
lstm_avg_value = mape_comparison['LSTM'].mean()

mape_comparison['LSTM'] = mape_comparison['LSTM'].apply(lambda x: '{:.2f}'.format(x))
mape_comparison['SARIMA'] = mape_comparison['SARIMA'].apply(lambda x: '{:.2f}'.format(x))
mape_comparison['TBATS'] = mape_comparison['TBATS'].apply(lambda x: '{:.2f}'.format(x))

(mape_comparison).style.hide(axis = 0).to_latex(os.path.join(tables_output_folder, "tab_12.tex"), hrules=True)

## Połączone wykresy porównujące modele

In [64]:
# List of the models
models = ['sarima', 'tbats', 'lstm']

for country in country_list:
    # List of paths to PNG files with plots for each model
    paths = [os.path.join(images_output_folder, f'{model}_predictions_{country}.png') for model in models]

    # Load PNG images
    images = [Image.open(path) for path in paths]

    # Get image dimensions
    widths, heights = zip(*(image.size for image in images))

    # Create a new image with a width equal to the widest image and a height equal to the sum of heights
    new_width = max(widths)
    new_height = sum(heights)

    new_image = Image.new('RGB', (new_width, new_height), (255, 255, 255))  # White background

    # Paste images onto the new image
    current_height = 0
    for image in images:
        new_image.paste(image, (0, current_height))
        current_height += image.size[1]

    # Save the new image
    output_file_path = os.path.join(images_output_folder, f'model_comparison_{country}.png')
    new_image.save(output_file_path)