In [None]:
# ========================================================================
#                     Time Series Analysis Project
# ========================================================================
#    Project: Surface Air Temperature Mean Daily Minimum
#    Model: Long Short-Term Memory (LSTM) Implementation
#    
#    Description:
#    Advanced time series analysis using LSTM neural networks for temperature
#    prediction. This implementation is part of a series exploring sequential
#    models from SimpleRNN to Transformers.
#    
#    Author: Dr. Saad Laouadi
#    Date: February 2025
#    Version: 1.0
#    
# ========================================================================
#  ®Copyright Dr. Saad Laouadi, 2025. All rights reserved.
# ========================================================================

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf

print("*"*52)
%reload_ext watermark

%watermark -a "Dr. Saad Laouadi"
%watermark -iv -ud
print("*"*52)

In [None]:
# =============================================================== #
#        User Defined Functions
# =============================================================== #

def load_data(filepath):
    """
    Load and preprocess the temperature data.
    """
    df = pd.read_csv(filepath)
    
    # Convert month column to datetime
    df['month'] = pd.to_datetime(df['month'], format='%Y-%m')
    
    # Set month as index
    df.set_index('month', inplace=True)
    
    return df


def create_sequences(data, lookback):
    """Create sequences for LSTM model."""
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:(i + lookback)])
        y.append(data[i + lookback])
    return np.array(X), np.array(y)


def split_data(data, train_size=0.8):
    """Split data into training and testing sets."""
    n = len(data)
    train_size = int(n * train_size)
    train_data = data[:train_size]
    test_data = data[train_size:]
    return train_data, test_data


def normalize_data(train_data, test_data):
    """Normalize data using MinMaxScaler."""
    scaler = MinMaxScaler()
    train_scaled = scaler.fit_transform(train_data.reshape(-1, 1))
    test_scaled = scaler.transform(test_data.reshape(-1, 1))
    return train_scaled, test_scaled, scaler

def plot_training_history(history):
    """Plot training history."""
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['mean_absolute_error'], label='Training MAE')
    plt.plot(history.history['val_mean_absolute_error'], label='Validation MAE')
    plt.title('Model MAE')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

    
def plot_predictions(actual, predictions, title):
    """Plot actual vs predicted values."""
    plt.figure(figsize=(12, 6))
    plt.plot(actual, label='Actual')
    plt.plot(predictions, label='Predicted')
    plt.title(title)
    plt.xlabel('Time Step')
    plt.ylabel('Temperature')
    plt.legend()
    plt.grid(True)
    plt.show()


def evaluate_model(model, X_test, y_test, scaler):
    """Evaluate the LSTM model performance."""
    # Make predictions
    predictions = model.predict(X_test)
    
    # Inverse transform predictions and actual values
    predictions = scaler.inverse_transform(predictions)
    y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
    
    # Initialize metrics
    mse_metric = tf.keras.metrics.MeanSquaredError()
    mae_metric = tf.keras.metrics.MeanAbsoluteError()
    rmse_metric = tf.keras.metrics.RootMeanSquaredError()
    
    # Update metrics state
    mse_metric.update_state(y_test_inv, predictions)
    rmse_metric.update_state(y_test_inv, predictions)
    mae_metric.update_state(y_test_inv, predictions)
    
    # Calculate final metrics
    mse = mse_metric.result().numpy()
    rmse = rmse_metric.result().numpy()
    mae = mae_metric.result().numpy()
    
    metrics = {
        "mse": mse,
        "rmse": rmse,
        "mae": mae
    }
    print(f'Test MSE:  {mse:.4f}')
    print(f'Test RMSE: {rmse:.4f}')
    print(f'Test MAE:  {mae:.4f}')
    
    return predictions, y_test_inv, metrics

In [None]:
# Create a function that defines simple LSTM model



In [None]:
# Create a function that trains the model



In [None]:
# Main execution
# =============
# Load data
data_path = "../../datasets/SurfaceAirTemperatureMonthlyMeanDailyMinimum.csv" # change this path if necessary


In [None]:
# print the data index


In [None]:
# get air-temperature data


In [None]:
# Split data


In [None]:
# Normalize data after splitting


In [None]:
# Create sequences

# check the shape


In [None]:
# Build and train model


# check model summary



In [None]:
# train the model



In [None]:
# Plot training history



In [None]:
# Make predictions and evaluate on train data



In [None]:
# Make predictions and evaluate on test data



In [None]:
# Plot results



In [None]:
# convert the train metrics into dataframe and display them


In [None]:
# convert the test metrics into dataframe and display them
