In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from IPython.display import clear_output
clear_output()

In [2]:
# gpus = tf.config.list_physical_devices('GPU')
# if gpus:
#   try:
#     # Currently, memory growth needs to be the same across GPUs
#     for gpu in gpus:
#       tf.config.experimental.set_memory_growth(gpu, True)
#     logical_gpus = tf.config.list_logical_devices('GPU')
#     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
#   except RuntimeError as e:
#     # Memory growth must be set before GPUs have been initialized
#     print(e)

In [3]:
# Load data from CSV
df = pd.read_csv('./data/continuous dataset.csv')

In [4]:
#************************************** Data Pre-Processing ****************************************

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

class TabularLSTMDataPreprocessor:
    def __init__(self, dataframe, target_column, time_column, categorical_columns=None,
                 scaler='minmax', sequence_length=24, batch_size=32, random_state=None):
        self.dataframe = dataframe
        self.target_column = target_column
        self.time_column = time_column
        self.categorical_columns = categorical_columns if categorical_columns else []
        self.scaler = self.get_scaler(scaler)
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.random_state = random_state

    def get_scaler(self, scaler_type):
        if scaler_type == 'minmax':
            return MinMaxScaler()
        elif scaler_type == 'standard':
            return StandardScaler()
        else:
            raise ValueError("Invalid scaler type. Use 'minmax' or 'standard'.")

    def preprocess(self):
        #Creating lag features
        windows = [12, 24, 128]
        for column in self.dataframe.columns:
            if column != self.time_column and column not in self.categorical_columns:
                for window in windows:
                    self.dataframe[f"{column}_lag_{window}"] = self.dataframe[column].shift(window)
                    # Add other transformations as needed

        # Drop rows with missing values
        self.dataframe.dropna(inplace=True)

        # Sort by time
        self.dataframe.sort_values(by=[self.time_column], inplace=True)
        
        # Scale numerical features
        numerical_columns = [col for col in self.dataframe.columns if col not in [self.target_column, self.time_column, self.categorical_columns]]
        self.dataframe[numerical_columns] = self.scaler.fit_transform(self.dataframe[numerical_columns])

        # Apply one-hot encoding to categorical columns (if any)
        if self.categorical_columns:
            self.dataframe = pd.get_dummies(self.dataframe, columns=self.categorical_columns, drop_first=True)
        
        train_df = self.dataframe[self.dataframe[self.time_column] < '2019-01-01']
        test_df = self.dataframe[self.dataframe[self.time_column] >= '2019-01-01']
        # Split data into train and test sets
        X_train = train_df.drop(columns=[self.target_column, self.time_column]).values.astype(np.float32)
        y_train = train_df[self.target_column].values.astype(np.float32)
#         X = X.astype(np.float32)
#         y = y.astype(np.float32)
#         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state, shuffle=False)
        X_test = test_df.drop(columns=[self.target_column, self.time_column]).values.astype(np.float32)
        y_test = test_df[self.target_column].values.astype(np.float32)
        
        # Create time series generators for training and testing
        train_data_gen = TimeseriesGenerator(X_train, y_train,
                                             length=self.sequence_length, batch_size=self.batch_size)
        test_data_gen = TimeseriesGenerator(X_test, y_test,
                                            length=self.sequence_length, batch_size=self.batch_size)
    

        return train_data_gen, test_data_gen

In [5]:
# Instantiate the TabularLSTMDataPreprocessor
data_preprocessor = TabularLSTMDataPreprocessor(df, target_column='nat_demand', time_column='datetime',
                                                categorical_columns=['holiday', 'school', 'Holiday_ID'],
                                                scaler='standard', sequence_length=24, batch_size=64)

# Preprocess the data and obtain data generators
train_data_gen, test_data_gen = data_preprocessor.preprocess()
clear_output()

In [6]:
#************************************** LSTM MODEL ****************************************

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Flatten
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

# Define custom RMSE loss function
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

class TabularLSTMModel:
    def __init__(self, input_shape, lstm_units=[64, 32], output_units=1):
        self.input_shape = input_shape
        self.lstm_units = lstm_units
        self.output_units = output_units
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        for units in self.lstm_units:
            model.add(LSTM(units, return_sequences=True, input_shape=self.input_shape, activation='relu'))
        model.add(Flatten())
        model.add(Dense(self.output_units))
        return model

    def compile(self, learning_rate=0.001):
        optimizer = Adam(learning_rate=learning_rate)
        self.model.compile(loss=root_mean_squared_error, optimizer=optimizer)

    def fit(self, train_data_gen, epochs=10):
        self.model.fit(train_data_gen, epochs=epochs)

    def evaluate(self, test_data_gen):
        return self.model.evaluate(test_data_gen)
    
    def predict(self, data_gen):
        return self.model.predict(data_gen)
    
    def summary(self):
        return self.model.summary()

In [7]:
# Instantiate the TabularLSTMModel with two LSTM layers
input_shape = (24, 75)
lstm_units = [64, 32, 16, 8]  # Define the units for each LSTM layer
lstm_model = TabularLSTMModel(input_shape, lstm_units)
num_epochs = 20
lstm_model.summary()


# Compile the model
lstm_model.compile(learning_rate=0.001)

# Train the model
lstm_model.fit(train_data_gen, epochs=num_epochs)

# Evaluate the model on the test data
loss = lstm_model.evaluate(test_data_gen)
print(f'Test Loss (RMSE): {loss}')

  super().__init__(**kwargs)


Epoch 1/20


  self._warn_if_super_not_called()


[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1880s[0m 3s/step - loss: 716.0698
Epoch 2/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2089s[0m 4s/step - loss: 191.2664
Epoch 3/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3449s[0m 6s/step - loss: 161.6668
Epoch 4/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27342s[0m 50s/step - loss: 123.7503
Epoch 5/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m855s[0m 2s/step - loss: 111.9854
Epoch 6/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m839s[0m 2s/step - loss: 111.0639
Epoch 7/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1197s[0m 2s/step - loss: 114.4917
Epoch 8/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m835s[0m 2s/step - loss: 107.9323
Epoch 9/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6313s[0m 12s/step - loss: 102.4032
Epoch 10/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [8]:
#***************************** RESULT *********************************

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, max_error, mean_poisson_deviance, mean_gamma_deviance, mean_tweedie_deviance, mean_absolute_percentage_error

class Result:
    def __init__(self, model, test_data_gen):
        self.model = model
        self.test_data_gen = test_data_gen
        self.y_true = None
        self.y_pred = None

    def evaluate(self):
        y_true = []
        y_pred = []

        for i in range(len(self.test_data_gen)):
            x_batch, y_batch = self.test_data_gen[i]
            y_true_batch = y_batch
            y_pred_batch = self.model.predict(x_batch)

            # Append values to the lists within the loop
            y_true.extend(y_true_batch)
            y_pred.extend(y_pred_batch)

        self.y_true = np.array(y_true).flatten()
        self.y_pred = np.array(y_pred).flatten()

        mae = mean_absolute_error(self.y_true, self.y_pred)
        mse = mean_squared_error(self.y_true, self.y_pred)
        rmse = np.sqrt(mse)

        # Calculate MAPE (Mean Absolute Percentage Error)
        mape = mean_absolute_percentage_error(self.y_true, self.y_pred)
        
        r2 = r2_score(self.y_true, self.y_pred)
        explained_variance = explained_variance_score(self.y_true, self.y_pred)
        max_err = max_error(self.y_true, self.y_pred)
        poisson_deviance = mean_poisson_deviance(self.y_true, self.y_pred)
        gamma_deviance = mean_gamma_deviance(self.y_true, self.y_pred)
        tweedie_deviance = mean_tweedie_deviance(self.y_true, self.y_pred)

        return {
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "MAPE": mape,
            "R2": r2,
            "Explained Variance": explained_variance,
            "Max Error": max_err,
            "Mean Poisson Deviance": poisson_deviance,
            "Mean Gamma Deviance": gamma_deviance,
            "Mean Tweedie Deviance": tweedie_deviance
        }

In [9]:
# Usage
result = Result(lstm_model, test_data_gen)
evaluation = result.evaluate()
clear_output()
y_true_lstm = result.y_true
y_pred_lstm = result.y_pred
# Save the output to a text file
output_filename = "lstm_evaluation.txt"
with open(output_filename, "w") as output_file:
    output_file.write("LSTM Model Evaluation Metrics --\n")
    for metric, value in evaluation.items():
        output_file.write(f"{metric}: {value}\n")

print(f"Evaluation metrics saved to {output_filename}")

Evaluation metrics saved to lstm_evaluation.txt


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Flatten
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

# Define custom RMSE loss function
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

class TabularGRUModel:
    def __init__(self, input_shape, gru_units=[64, 32], output_units=1):
        self.input_shape = input_shape
        self.gru_units = gru_units
        self.output_units = output_units
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        for units in self.gru_units:
            model.add(GRU(units, return_sequences=True, input_shape=self.input_shape, activation='relu'))
        model.add(Flatten())
        model.add(Dense(self.output_units))
        return model

    def compile(self, learning_rate=0.001):
        optimizer = Adam(learning_rate=learning_rate)
        self.model.compile(loss=root_mean_squared_error, optimizer=optimizer)

    def fit(self, train_data_gen, epochs=10):
        self.model.fit(train_data_gen, epochs=epochs)

    def evaluate(self, test_data_gen):
        return self.model.evaluate(test_data_gen)
    
    def predict(self, data_gen):
        return self.model.predict(data_gen)
    
    def summary(self):
        return self.model.summary()
    

    
# Instantiate the TabularGRUModel with two GRU layers
input_shape = (24, 75)
gru_units = [64, 32, 16, 8]  # Define the units for each GRU layer
num_epochs = 20
gru_model = TabularGRUModel(input_shape, gru_units)
gru_model.summary()

# Compile the model
gru_model.compile(learning_rate=0.001)

# Train the model using the train_data_gen
gru_model.fit(train_data_gen, epochs=num_epochs)

# Evaluate the model on the test data using the test_data_gen
loss = gru_model.evaluate(test_data_gen)
print(f'Test Loss (RMSE): {loss}')

  super().__init__(**kwargs)


Epoch 1/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4165s[0m 8s/step - loss: 473.7155
Epoch 2/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1040s[0m 2s/step - loss: 117.0319
Epoch 3/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2076s[0m 4s/step - loss: 112.7898
Epoch 4/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2258s[0m 4s/step - loss: 102.1562
Epoch 5/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2117s[0m 4s/step - loss: 98.9811
Epoch 6/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38778s[0m 71s/step - loss: 93.2469
Epoch 7/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2141s[0m 4s/step - loss: 87.9017
Epoch 8/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1158s[0m 2s/step - loss: 87.2100
Epoch 9/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1064s[0m 2s/step - loss: 90.0497
Epoch 10/20
[1m545/545[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
# Usage
result_gru = Result(gru_model, test_data_gen)
evaluation = result_gru.evaluate()
clear_output()
y_true_gru = result_gru.y_true
y_pred_gru = result_gru.y_pred
# Save the output to a text file
output_filename = "gru_evaluation.txt"
with open(output_filename, "w") as output_file:
    output_file.write("GRU Model Evaluation Metrics --\n")
    for metric, value in evaluation.items():
        output_file.write(f"{metric}: {value}\n")

print(f"Evaluation metrics saved to {output_filename}")

In [None]:
data = {'y_true': y_true_lstm, 'y_pred': y_pred_lstm}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Specify the filename for the CSV file
csv_filename = 'lstm_predictions.csv'

# Save the DataFrame to a CSV file
df.to_csv(csv_filename, index=False)

In [None]:
data = {'y_true': y_true_gru, 'y_pred': y_pred_gru}

# Create a DataFrame from the dictionary
df = pd.DataFrame(data)

# Specify the filename for the CSV file
csv_filename = 'gru_predictions.csv'

# Save the DataFrame to a CSV file
df.to_csv(csv_filename, index=False)