In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from IPython.display import clear_output
clear_output()

In [2]:
df = pd.read_csv('continuous_dataset.csv')

In [3]:
df.head()

Unnamed: 0,datetime,nat_demand,T2M_toc,QV2M_toc,TQL_toc,W2M_toc,T2M_san,QV2M_san,TQL_san,W2M_san,T2M_dav,QV2M_dav,TQL_dav,W2M_dav,Holiday_ID,holiday,school
0,2015-01-03 01:00:00,970.345,25.865259,0.018576,0.016174,21.850546,23.482446,0.017272,0.001855,10.328949,22.662134,0.016562,0.0961,5.364148,0,0,0
1,2015-01-03 02:00:00,912.1755,25.899255,0.018653,0.016418,22.166944,23.399255,0.017265,0.001327,10.681517,22.578943,0.016509,0.087646,5.572471,0,0,0
2,2015-01-03 03:00:00,900.2688,25.93728,0.018768,0.01548,22.454911,23.34353,0.017211,0.001428,10.874924,22.53103,0.016479,0.078735,5.871184,0,0,0
3,2015-01-03 04:00:00,889.9538,25.957544,0.01889,0.016273,22.110481,23.238794,0.017128,0.002599,10.51862,22.512231,0.016487,0.06839,5.883621,0,0,0
4,2015-01-03 05:00:00,893.6865,25.97384,0.018981,0.017281,21.186089,23.075403,0.017059,0.001729,9.733589,22.481653,0.016456,0.064362,5.611724,0,0,0


# Preprocess the data:

In [4]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import TimeseriesGenerator

class TabularLSTMDataPreprocessor:
    def __init__(self, dataframe, target_column, time_column, categorical_columns=None,
                 scaler='minmax', sequence_length=24, batch_size=32, random_state=None):
        self.dataframe = dataframe
        self.target_column = target_column
        self.time_column = time_column
        self.categorical_columns = categorical_columns if categorical_columns else []
        self.scaler = self.get_scaler(scaler)
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.random_state = random_state

    def get_scaler(self, scaler_type):
        if scaler_type == 'minmax':
            return MinMaxScaler()
        elif scaler_type == 'standard':
            return StandardScaler()
        else:
            raise ValueError("Invalid scaler type. Use 'minmax' or 'standard'.")

    def preprocess(self):
       
        windows = [12, 24, 128]
        for column in self.dataframe.columns:
            if column != self.time_column and column not in self.categorical_columns:
                for window in windows:
                    self.dataframe[f"{column}_lag_{window}"] = self.dataframe[column].shift(window)
                   
        # Drop rows with missing values
        self.dataframe.dropna(inplace=True)

        # Sort by time
        self.dataframe.sort_values(by=[self.time_column], inplace=True)
        
        # Scale numerical features
        numerical_columns = [col for col in self.dataframe.columns if col not in [self.target_column, self.time_column, self.categorical_columns]]
        self.dataframe[numerical_columns] = self.scaler.fit_transform(self.dataframe[numerical_columns])

        # Apply one-hot encoding to categorical columns (if any)
        if self.categorical_columns:
            self.dataframe = pd.get_dummies(self.dataframe, columns=self.categorical_columns, drop_first=True)
        
        train_df = self.dataframe[self.dataframe[self.time_column] < '2019-01-01']
        test_df = self.dataframe[self.dataframe[self.time_column] >= '2019-01-01']
        # Split data into train and test sets
        X_train = train_df.drop(columns=[self.target_column, self.time_column]).values.astype(np.float32)
        y_train = train_df[self.target_column].values.astype(np.float32)
        #X = X.astype(np.float32)
        #y = y.astype(np.float32)
#        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state, shuffle=False)
        X_test = test_df.drop(columns=[self.target_column, self.time_column]).values.astype(np.float32)
        y_test = test_df[self.target_column].values.astype(np.float32)
        
        # Create time series generators for training and testing
        train_data_gen = TimeseriesGenerator(X_train, y_train,
                                             length=self.sequence_length, batch_size=self.batch_size)
        test_data_gen = TimeseriesGenerator(X_test, y_test,
                                            length=self.sequence_length, batch_size=self.batch_size)
    

        return train_data_gen, test_data_gen 


In [5]:
# Instantiate the TabularLSTMDataPreprocessor
data_preprocessor = TabularLSTMDataPreprocessor(df, target_column='nat_demand', time_column='datetime',
                                                categorical_columns=['holiday', 'school', 'Holiday_ID'],
                                                scaler='standard', sequence_length=24, batch_size=64)

# Preprocess the data and obtain data generators

train_data_gen, test_data_gen = data_preprocessor.preprocess()
clear_output()


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Flatten
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

# Define custom RMSE loss function
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

def accuracy(y_true, y_pred):
    return accuracy_score((y_true, y_pred))



class TabularLSTMModel:
    def __init__(self, input_shape, lstm_units=[64, 32], output_units=1):
        self.input_shape = input_shape
        self.lstm_units = lstm_units
        self.output_units = output_units
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        for units in self.lstm_units:
            model.add(LSTM(units, return_sequences=True, input_shape=self.input_shape, activation='relu'))
        model.add(Flatten())
        model.add(Dense(self.output_units))
        return model

    def compile(self, learning_rate=0.001):
        optimizer = Adam(learning_rate=learning_rate)
        self.model.compile(loss=root_mean_squared_error, optimizer=optimizer)

    def fit(self, train_data_gen, epochs=10):
        self.model.fit(train_data_gen, epochs=epochs)

    def evaluate(self, test_data_gen):
        return self.model.evaluate(test_data_gen)
    
    def predict(self, data_gen):
        return self.model.predict(data_gen)
    
    def summary(self):
        return self.model.summary()
    
    def save(self, filepath):
        self.model.save(filepath)
    
    



In [9]:
from tensorflow.keras.models import load_model
from tensorflow.keras.models import save_model

# Instantiate the TabularLSTMModel with two LSTM layers
input_shape = (24, 75)
lstm_units = [64, 32, 16, 8]  # Define the units for each LSTM layer
lstm_model = TabularLSTMModel(input_shape, lstm_units)
num_epochs = 10
lstm_model.summary()


# Compile the model
lstm_model.compile(learning_rate=0.001)





# After saving the model
lstm_model.model.save('lstm_model.h5')

# Import the saved model, providing the custom loss function to custom_objects
loaded_model = load_model('lstm_model.h5', custom_objects={'root_mean_squared_error': root_mean_squared_error})



# Train and save the model
lstm_model.fit(train_data_gen, epochs=num_epochs)
lstm_model.save('trained_lstm_model.h5')

# Evaluate the model on the test data
loss = lstm_model.evaluate(test_data_gen)

print(f'Test Loss (RMSE): {loss}')



Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 24, 64)            35840     
                                                                 
 lstm_9 (LSTM)               (None, 24, 32)            12416     
                                                                 
 lstm_10 (LSTM)              (None, 24, 16)            3136      
                                                                 
 lstm_11 (LSTM)              (None, 24, 8)             800       
                                                                 
 flatten_2 (Flatten)         (None, 192)               0         
                                                                 
 dense_2 (Dense)             (None, 1)                 193       
                                                                 
Total params: 52,385
Trainable params: 52,385
Non-trai



Epoch 1/10


2024-04-19 16:19:52.984015: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]
2024-04-19 16:19:53.015951: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2024-04-19 16:21:46.792607: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Test Loss (RMSE): 482.0828552246094


In [10]:
#***************************** RESULT *********************************

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, max_error, mean_poisson_deviance, mean_gamma_deviance, mean_tweedie_deviance, mean_absolute_percentage_error, accuracy_score

class Result:
    def __init__(self, model, test_data_gen):
        self.model = model
        self.test_data_gen = test_data_gen
        self.y_true = None
        self.y_pred = None

    def evaluate(self):
        y_true = []
        y_pred = []

        for i in range(len(self.test_data_gen)):
            x_batch, y_batch = self.test_data_gen[i]
            y_true_batch = y_batch
            y_pred_batch = self.model.predict(x_batch)

            # Append values to the lists within the loop
            y_true.extend(y_true_batch)
            y_pred.extend(y_pred_batch)

        self.y_true = np.array(y_true).flatten()
        self.y_pred = np.array(y_pred).flatten()

        mae = mean_absolute_error(self.y_true, self.y_pred)
        mse = mean_squared_error(self.y_true, self.y_pred)
        rmse = np.sqrt(mse)
        

        # Calculate MAPE (Mean Absolute Percentage Error)
        mape = mean_absolute_percentage_error(self.y_true, self.y_pred)
        
        r2 = r2_score(self.y_true, self.y_pred)
        explained_variance = explained_variance_score(self.y_true, self.y_pred)
        max_err = max_error(self.y_true, self.y_pred)
        poisson_deviance = mean_poisson_deviance(self.y_true, self.y_pred)
        gamma_deviance = mean_gamma_deviance(self.y_true, self.y_pred)
        tweedie_deviance = mean_tweedie_deviance(self.y_true, self.y_pred)

        return {
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "MAPE": mape,
            "R2": r2,
            "Accuracy": accuracy,
            "Explained Variance": explained_variance,
            "Max Error": max_err,
            "Mean Poisson Deviance": poisson_deviance,
            "Mean Gamma Deviance": gamma_deviance,
            "Mean Tweedie Deviance": tweedie_deviance
        }

In [11]:
# Usage
result = Result(lstm_model, test_data_gen)
evaluation = result.evaluate()
clear_output()
y_true_lstm = result.y_true
y_pred_lstm = result.y_pred
# Save the output to a text file
output_filename = "lstm_evaluation.txt"
with open(output_filename, "w") as output_file:
    output_file.write("LSTM Model Evaluation Metrics --\n")
    for metric, value in evaluation.items():
        output_file.write(f"{metric}: {value}\n")

print(f"Evaluation metrics saved to {output_filename}")

Evaluation metrics saved to lstm_evaluation.txt


In [12]:
data = {'y_true': y_true_lstm, 'y_pred': y_pred_lstm}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Specify the filename for the CSV file
csv_filename = 'lstm_predictions.csv'

# Save the DataFrame to a CSV file
df.to_csv(csv_filename, index=False)

In [14]:
lstm_model.save('Model.keras')