In [1]:
import pandas as pd 
import numpy as np  
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from keras.models import Model, Sequential, load_model
from keras.callbacks import EarlyStopping
from keras.losses import MeanSquaredError, Huber,MeanAbsoluteError
from keras.layers import Dense, LSTM, Reshape,Dropout,Bidirectional
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import load_model


from vmdpy import VMD

from typing import List as List, Tuple as Tuple, Dict as Dict,Optional as Optional

2024-02-11 20:46:41.847217: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-11 20:46:41.847311: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-11 20:46:41.849204: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
df_load = pd.read_csv("/mnt/e/github/load-forecast/01.database/processed/load/2022_2023_load_processed.csv", sep = ",", encoding = 'latin-1', index_col=0)
df_temp = pd.read_csv("/mnt/e/github/load-forecast/01.database/processed/temperature/2022_2023_temperature_processed.csv", sep = ',',encoding = 'latin-1', index_col=0)

df_temp['datetime'] = pd.to_datetime(df_temp['datetime'])
df_load['datetime'] = pd.to_datetime(df_load['datetime']).dt.tz_localize('UTC')

df_raw = pd.merge(df_temp,df_load, on='datetime', how='inner')

del df_load
del df_temp

df_raw = df_raw.iloc[4344:]

df_raw.head()

Unnamed: 0,datetime,temperature,total_load
4344,2022-07-01 00:00:00+00:00,18.143443,61874.013017
4345,2022-07-01 01:00:00+00:00,17.446575,59042.531005
4346,2022-07-01 02:00:00+00:00,16.976438,57149.292992
4347,2022-07-01 03:00:00+00:00,16.481543,56519.531943
4348,2022-07-01 04:00:00+00:00,16.14011,56818.247011


In [3]:
def ciclical_time_encoding(df:pd.DataFrame)->pd.DataFrame:

    """
    Encodes datetime values in a DataFrame using cyclic encoding.

    Args:
        - df: DataFrame containing a 'datetime' column to be cyclically encoded.

    Returns:
        - DataFrame with additional columns representing the cyclically encoded datetime values.
    """

    #Ciclical encoding datetime object:
    day = 1
    weekly = day*7
    year = day*365

    #Convert into an number
    timestamp_s = df['datetime'].map(datetime.timestamp)

    df = df.assign(year_sin = (np.sin(timestamp_s * (2*np.pi/year))).values)
    df = df.assign(year_cos = (np.cos(timestamp_s * (2*np.pi/year))).values)

    df = df.assign(daily_sin = (np.sin(timestamp_s * (2*np.pi/day))).values)
    df = df.assign(daily_cos = (np.cos(timestamp_s * (2*np.pi/day))).values)

    df = df.assign(weekly_sin = (np.sin(timestamp_s * (2*np.pi/weekly))).values)
    df = df.assign(weekly_cos = (np.cos(timestamp_s * (2*np.pi/weekly))).values)
    
    df.reset_index(inplace = True)
    return(df)

In [4]:
def retrieve_and_remove_datetime(df:pd.DataFrame)-> List[datetime]:

    '''
    - Args:
        - df: Dataframe to remove datetime column
    - Returns:
        - datetime_rage: Removed column as a List
    '''
   
    datetime_range = df['datetime'].to_list()
    df.drop(columns = ['datetime'], inplace = True)
    
    return datetime_range

In [5]:
def decompose_series_vmd(df:pd.DataFrame,timeseries:str, k_nmodes:int)-> pd.DataFrame:
    '''

    Decomposes a time series using Variational Mode Decomposition (VMD).
    
    - Args:
        - df: Dataframe with the timeseries that will be decomposed
        - timeseres: Dataframe column that will go trought the vmd process
        - k_nmodes: n decomposed series
    - Returns:
        - df: dataframe with new decomposed columns
    '''

    print(f"Decomposing {timeseries} in {k_nmodes} nmodes")
    
    timeseries = df[timeseries].to_list()

    alpha = 2000     # moderate bandwidth constraint  
    tau = 0           # noise-tolerance (no strict fidelity enforcement)  
    k_nmodes = k_nmodes           # n modes  
    DC = 0             # no DC part imposed  
    init = 0           # initialize omegas uniformly  
    tol = 1e-6
    u, u_hat, omega = VMD(timeseries,alpha,tau,k_nmodes,DC,init,tol)

    label_columns = []

    for i in range(len(u)):
        col_name = 'series_dec_' + str(i+1)
        df[col_name] = u[i]  
        label_columns.append('series_dec_'+str(i+1))

    return(df)

In [6]:
def split_and_scal_df(df:pd.DataFrame)-> Tuple[Dict[str,pd.DataFrame],Tuple[pd.Series,pd.Series]]:

    """
    Splits the input DataFrame into training, validation, and test sets, scales the datasets using Min-Max scaling,
    and returns the scaled DataFrames along with the min-max values used for scaling.

    Parameters:
    - df: Input DataFrame to be split and scaled.

    Returns:
    - Tuple containing:
        - A dictionary with scaled DataFrams
        - A tuple containing min and max values for each feature used during scaling.
    """

    n = len(df)
    # Split 70:20:10 (train:validation:test)
    train_df = df[0:int(n*0.7)]
    val_df = df[int(n*0.7):int(n*0.9)]
    test_df = df[int(n*0.9):]

    #retrieve the max and min values of each feature. This will be used to bring those values back to the original
    #dimesion after scalling it to train the model
    min_values = train_df.min()
    max_values = train_df.max()

    #Scaling the dataframes to train
    scaler = MinMaxScaler()
    scaler.fit(train_df)

    scal_df = df.copy()
    scal_df[scal_df.columns] = scaler.transform(df[df.columns])

    scal_train_df = train_df.copy()
    scal_train_df[scal_train_df.columns] = scaler.transform(train_df[train_df.columns])

    scal_val_df = val_df.copy()
    scal_val_df[scal_val_df.columns] = scaler.transform(val_df[val_df.columns])

    scal_test_df = test_df.copy()
    scal_test_df[scal_test_df.columns] = scaler.transform(test_df[test_df.columns])

    dict_scal_df = {"train_df":scal_train_df,
                    "val_df":scal_val_df,
                    "test_df":scal_test_df}

    min_max_values = (min_values,max_values)

    return scal_df,min_max_values


In [None]:
def forecast_and_plot (df,min_values,max_values,model,input_width,forecast_days,steps_per_day,
                         label_columns=label_columns):

    #Convert varibles from days to timesteps.
    
    input_width = input_width*steps_per_day
    forecast_width = forecast_days*steps_per_day

    #Input Period
    start_input = 0
    end_input = input_width

    #Forecast/Label period
    start_forecast = end_input
    end_forecast = start_forecast + forecast_width
    
    now = datetime.datetime(2023,7,3,17,0,0)

    labels = [now]

    for time in range(input_width + forecast_width):
        
        now = now+timedelta(hours=1) 
        labels.append(now)

    #Registring dates as labels to the plot
    inputs_labels = labels[start_input:end_input]
    predictions_labels = labels[start_forecast:end_forecast]

    #Converting inputs to tensor
    inputs = tf.convert_to_tensor(df[start_input:end_input])
    inputs_to_model = tf.expand_dims(inputs,axis=0)

    #Converting real values to tensor
    real_values = tf.convert_to_tensor(df[start_forecast:end_forecast])
    

    #slicing inputs to retrieve only the decomposed windspeeds
    sliced_inputs = inputs[:,-len(label_columns):]
   

    #Scale back to the original value range
    original_scale_inputs = scale_back(sliced_inputs,min_values,max_values,label_columns)
    
    #Sum across all decomposed values
    summed_original_scale_inputs = tf.math.reduce_sum(original_scale_inputs,axis=1,keepdims=False,name=None)
    

    #Make predictions
    predictions = model(inputs_to_model)
    predictions = tf.squeeze(predictions,axis=0)

    #Scale predictions and sum all decomposed windspeeds
    original_scale_predictions = scale_back(predictions,min_values,max_values,label_columns)
    summed_original_scale_predictions = tf.math.reduce_sum(original_scale_predictions,axis=1,keepdims=False,name=None)

    #Avg calc

    predictions_avg = calc_ws_avg(summed_original_scale_predictions,steps_per_day)

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=inputs_labels,
                            y=summed_original_scale_inputs,
                            mode='lines',
                            name='inputs',
                            line = dict(color='#2E294E',width=4)
                            ))

    fig.add_trace(go.Scatter(x=predictions_labels,
                            y=summed_original_scale_predictions,
                            mode='lines',
                            name='predictions',
                            line = dict(color='#D90368',width=4)
                            ))

    x_0 = inputs_labels[-1]
    text_position = x_0 
    
    fig.add_annotation(
        showarrow=False,
        text = 'Wind speed daily avg (m/s)',
        x = text_position - timedelta(days=1),
        y= 14,
        font = dict(size = 12, color = 'black')

    )
    
    for day in range(forecast_days):
        
        text_string = 'day_' + str(day+1) + ': pred = '+ str(predictions_avg[day])   
                 
        fig.add_annotation(
            showarrow=False,
            text = text_string,
            x = text_position + timedelta(hours=12),
            y = 14,
            font = dict(size = 11, color = 'black'),
        )
        fig.add_vline(
            x = text_position,
            line_width = 1.5,
            line_dash = 'dash',
            line_color = 'black'
        )

        text_position = text_position + timedelta(days=1)

    fig.update_layout(title='[TRS02] Wind speed forecast',
                    xaxis_title='',
                    yaxis_title='Windspeed (m/s)',
                    template = 'plotly_white',
                    yaxis = dict(range=[0,15]))

    fig.show()

In [7]:
path = '/mnt/e/github/load-forecast/03.results/'

model_list = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file)) and file.endswith(".keras")]

model_dict = {}
temperature = False
for model in model_list:
    if "temperature" in model:
        temperature = True
    k_nmodes = model.split("_")[-1]
    k_nmodes = int(k_nmodes.split(".")[0])
    model_dict[model] = {"temperature":temperature,
                         "k_nmodes":k_nmodes}

In [8]:
model_dict.keys()

dict_keys(['load_forecast_knmodes_40.keras', 'load_forecast_with_temperature_knmodes_10.keras', 'load_forecast_with_temperature_knmodes_20.keras'])

In [18]:
custom_objects = {'MeanAbsoluteError': MeanAbsoluteError}
#lstm_model = load_model(model_path, custom_objects=custom_objects)

model = "/mnt/e/github/load-forecast/03.results/load_forecast_with_temperature_knmodes_10.keras"
#model_path = os.path.join(path,model)
lstm_model = load_model(model)

In [17]:

k_nmodes = int(model.split("_")[-1].split(".")[0])
df = df_raw.copy()

df = ciclical_time_encoding(df = df)
datetime_range = retrieve_and_remove_datetime(df = df)
df = decompose_series_vmd(df = df, timeseries = 'total_load', k_nmodes = k_nmodes)

if temperature == False:
    df.drop(columns = ['temperature'], inplace = True)

total_load = df['total_load'].to_list()
df.drop(columns = ['total_load'], inplace = True)
df.drop(columns = ['index'], inplace = True)

scal_df, min_max_values = split_and_scal_df(df = df)



Decomposing total_load in 10 nmodes


NameError: name 'lsmt_model' is not defined

In [21]:
outputs = lstm_model(scal_df[:72])
print(outputs)

AttributeError: 'tuple' object has no attribute 'as_list'

In [None]:
for model in model_dict.keys:
    print("---------------------------------------------------")
    print("Initializing model validation")
    print("---------------------------------------------------")

    model_path = os.path.join(path,model)
    lstm_model = load_model(model_path)

    print(f"Model {model} loaded")
    print("---------------------------------------------------")
    temperature = model['temperature']
    k_nmodes = model['k_nmodes']

    print(f"Initializing feature eng.")

    df = df_raw.copy()
    df = ciclical_time_encoding(df = df)
    datetime_range = retrieve_and_remove_datetime(df = df)
    df = decompose_series_vmd(df = df, timeseries = 'total_load', k_nmodes = k_nmodes)

    if temperature == False:
        df.drop(columns = ['temperature'], inplace = True)
    

    total_load = df['total_load'].to_list()
    df.drop(columns = ['total_load'], inplace = True)
    df.drop(columns = ['index'], inplace = True)

    scal_df, min_max_values = split_and_scal_df(df = df)

    validation_dict = {}

    for row in scal_df.index():

        input = scal_df[row:row+72]

        output = lstm_model(input)
        