In [None]:
# ONLY RUN THIS IF YOU'RE IN GOOGLE COLAB
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/Thesis/Pintu-Air/notebooks')

# Verify you're in the right place
!pwd
!ls -la

Mounted at /content/drive
/content/drive/MyDrive/Thesis/Pintu-Air
total 27492
-rw------- 1 root root 5517847 Jun 11 14:58 '01 Result Data Cleaning Part 1.csv'
-rw------- 1 root root 2009870 Jun 13 22:15 '02 All Data.csv'
-rw------- 1 root root 1964974 Jun 11 14:58 '02 Data Preperation.ipynb'
-rw------- 1 root root  256098 Jun 11 14:58 '02 X_test.csv'
-rw------- 1 root root 4831129 Jun 11 14:58 '02 X_train.csv'
-rw------- 1 root root   40580 Jun 11 14:58 '02 y_test.csv'
-rw------- 1 root root  770651 Jun 11 14:58 '02 y_train.csv'
-rw------- 1 root root  322748 Jun 11 14:58 '03 ARIMA.ipynb'
-rw------- 1 root root    2277 Jun 11 14:58  03_Result_ARIMA.csv
-rw------- 1 root root 6247212 Jun 11 14:58  04c_ARIMA_Manggarai.ipynb
-rw------- 1 root root  631448 Jun 11 14:58  06_DataPreperation_ML.ipynb
-rw------- 1 root root 1611961 Jun 11 14:58  07a_best_model.keras
-rw------- 1 root root  365926 Jun 13 23:17  07a_LSTM_Manggarai.ipynb
-rw------- 1 root root 1685693 Jun 11 14:58  07b_best_model

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from itertools import product
import warnings
warnings.filterwarnings('ignore')

In [2]:
# GPU Configuration
print("GPU Available:", tf.config.list_physical_devices('GPU'))

GPU Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [4]:
y_train = (pd.read_csv("02 y_train.csv", index_col='Tanggal')).values
y_test = (pd.read_csv("02 y_test.csv", index_col='Tanggal')).values

In [10]:
X = pd.read_csv("02 All Data.csv")
X['Tanggal'] = pd.to_datetime( X['Tanggal'])
X = X.set_index('Tanggal')
X = X.iloc[24:] # to keep the number of data the same as other model

# One-hot encode all cuaca columns and drop 'Terang' column
categorical_col = [col for col in X.columns if 'cuaca' in col]
numeric_col = [col for col in X.columns if 'air' in col]

X_cat = pd.concat([X[col] for col in categorical_col], axis=1)
X_num = pd.concat([X[col] for col in numeric_col], axis=1)

columns_to_drop = []

for cuaca_col in categorical_col:
    # Create a new binary column for 'Hujan' or 'Gerimis'
    X_cat[f'{cuaca_col}_hujan'] = X_cat[cuaca_col].isin(['Hujan', 'Gerimis'])
    # Add the original 'cuaca' column to the list of columns to be dropped
    columns_to_drop.append(cuaca_col)

# Drop the original string-based 'cuaca' columns
X_cat = X_cat.drop(columns=columns_to_drop)

In [11]:
# Seperate train and test data
split_index = int(len(X) * 0.95)

X_num_train = X_num[:split_index]
X_num_test = X_num[split_index:]

X_cat_train = X_cat[:split_index]
X_cat_test = X_cat[split_index:]

# Scale the data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_num_train_scaled = scaler_X.fit_transform(X_num_train)
X_num_test_scaled = scaler_X.transform(X_num_test)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()

# Combine scaled continuous with unscaled binary features
X_train = np.concatenate([X_num_train_scaled, X_cat_train], axis=1)
X_test = np.concatenate([X_num_test_scaled, X_cat_test], axis=1)

print(f"\n{'='*60}\nData Train and Test Distribution:\n{'='*60}")

print(f'Jumlah data: {len(X)}')

print(f'Jumlah data X train: {len(X_num_train)}')
print(f'Jumlah data X test: {len(X_num_test)}')

print(f'Jumlah data y train: {len(y_train)}')
print(f'Jumlah data y test: {len(y_test)}')



Data Train and Test Distribution:
Jumlah data: 31200
Jumlah data X train: 29640
Jumlah data X test: 1560
Jumlah data y train: 29640
Jumlah data y test: 1560


In [14]:
def create_sequences(X, y, sequence_length):
    X_seq, y_seq = [], []

    for i in range(sequence_length, len(X)):
        # Take sequence_length previous time steps as features
        X_seq.append(X[i-sequence_length:i])
        # Current time step as target
        y_seq.append(y[i])

    return np.array(X_seq), np.array(y_seq)

In [17]:
def build_lstm_model(sequence_length, n_features, lstm_units, dropout_rate, learning_rate):
    """Build LSTM model with specified parameters"""
    model = Sequential()
    
    # First LSTM layer
    model.add(LSTM(lstm_units[0], return_sequences=True if len(lstm_units) > 1 else False, 
                   input_shape=(sequence_length, n_features)))
    model.add(Dropout(dropout_rate))
    
    # Additional LSTM layers
    for i in range(1, len(lstm_units)):
        return_seq = True if i < len(lstm_units) - 1 else False
        model.add(LSTM(lstm_units[i], return_sequences=return_seq))
        model.add(Dropout(dropout_rate))
    
    # Dense layers
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1))
    
    # Compile model
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse',
        metrics=['mae']
    )
    
    return model

In [18]:
def evaluate_model(model, X_seq, y_actual, scaler_y, dataset_name=""):
    """Evaluate model and return metrics for any dataset (train/test)"""
    # Predict
    y_pred_scaled = model.predict(X_seq, verbose=0)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
    
    # Calculate metrics using sklearn functions
    mse = mean_squared_error(y_actual, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_actual, y_pred)
    r2 = r2_score(y_actual, y_pred)
    mape = mean_absolute_percentage_error(y_actual, y_pred) * 100  # Convert to percentage
    
    # Add prefix to metric names if dataset specified
    prefix = f"{dataset_name}_" if dataset_name else ""
    
    return {
        f'{prefix}MSE': mse,
        f'{prefix}RMSE': rmse,
        f'{prefix}MAE': mae,
        f'{prefix}R2': r2,
        f'{prefix}MAPE': mape,
        f'{prefix}predictions': y_pred
    }

In [19]:
def create_results_summary(results_df):
    """Create summary tables and visualizations for thesis"""
    
    # 1. Top 10 models summary
    top_models = results_df.nsmallest(10, 'Test_RMSE').copy()
    top_models['Architecture'] = top_models['lstm_units'].astype(str)
    top_models['Overfitting'] = top_models['Train_RMSE'] - top_models['Test_RMSE']
    
    summary_cols = ['Architecture', 'learning_rate', 'dropout_rate', 
                   'Train_RMSE', 'Val_RMSE', 'Test_RMSE', 
                   'Train_R2', 'Val_R2', 'Test_R2', 
                   'Train_MAE', 'Val_MAE', 'Test_MAE', 
                   'Train_MAPE', 'Val_MAPE', 'Test_MAPE', 'Overfitting']
    
    top_summary = top_models[summary_cols]
    
    # 2. Best model per architecture
    arch_summary = results_df.groupby('lstm_units').apply(
        lambda x: x.loc[x['Test_RMSE'].idxmin()]
    )[summary_cols].round(6)
    
    # 3. Learning rate analysis
    lr_summary = results_df.groupby('learning_rate').agg({
        'Test_RMSE': ['mean', 'std', 'min'],
        'Test_R2': ['mean', 'std', 'max'],
        'Train_RMSE': ['mean', 'std', 'min'],
        'Train_R2': ['mean', 'std', 'max'],
        'Val_RMSE': ['mean', 'std', 'min'],
        'Val_R2': ['mean', 'std', 'max']
    })
    
    return top_summary, arch_summary, lr_summary

In [20]:
def count_combinations(param_grid):
    """Count total parameter combinations"""
    total = 1
    for param, values in param_grid.items():
        total *= len(values)
    return total

In [None]:
def lstm_grid_search(X_train, y_train_scaled, X_test, y_test, scaler_y, param_grid):
    """
    Perform grid search for LSTM hyperparameters
    """
    results = []
    best_score = float('inf')
    best_params = None
    best_model = None
    
    # Generate all parameter combinations
    param_combinations = list(product(*param_grid.values()))
    param_names = list(param_grid.keys())
    
    total_combinations = len(param_combinations)
    estimated_time = total_combinations * 5 / 60  # 5 minutes per combination
    
    print(f"Starting Grid Search:")
    print(f"- Total combinations: {total_combinations}")
    print(f"- Estimated time: {estimated_time:.1f} hours")
    print(f"{'='*80}")
    
    for i, params in enumerate(param_combinations):
        param_dict = dict(zip(param_names, params))
        print(f"\nCombination {i+1}/{total_combinations}: {param_dict}")
        
        try:
            # Create sequences
            X_seq, y_seq = create_sequences(X_train, y_train_scaled, param_dict['sequence_length'])
            
            # Split for validation (80% train, 20% validation)
            val_size = int(len(X_seq) * 0.2)
            X_train_fold = X_seq[:-val_size]
            y_train_fold = y_seq[:-val_size]
            X_val_fold = X_seq[-val_size:]
            y_val_fold = y_seq[-val_size:]
            
            # Build model
            model = build_lstm_model(
                sequence_length=param_dict['sequence_length'],
                n_features=X_seq.shape[2],
                lstm_units=param_dict['lstm_units'],
                dropout_rate=param_dict['dropout_rate'],
                learning_rate=param_dict['learning_rate']
            )
            
            # Callbacks
            early_stopping = EarlyStopping(
                monitor='val_loss', patience=15, restore_best_weights=True, verbose=0
            )
            reduce_lr = ReduceLROnPlateau(
                monitor='val_loss', factor=0.5, patience=8, min_lr=0.00001, verbose=0
            )
            
            # Train model
            history = model.fit(
                X_train_fold, y_train_fold,
                epochs=param_dict['epochs'],
                batch_size=param_dict['batch_size'],
                validation_data=(X_val_fold, y_val_fold),
                callbacks=[early_stopping, reduce_lr],
                verbose=0
            )
            
            # Evaluate on both training and test sets
            # Training evaluation (on actual training data used for fitting)
            y_train_fold_actual = scaler_y.inverse_transform(y_train_fold.reshape(-1, 1)).flatten()
            train_metrics = evaluate_model(model, X_train_fold, y_train_fold_actual, scaler_y, "Train")
            
            # Validation evaluation (for monitoring during training)
            val_metrics = evaluate_model(model, X_val_fold, y_val_fold.flatten(), scaler_y, "Val")
            
            # Test evaluation (completely unseen data)
            X_test_seq, _ = create_sequences(X_test, y_test.flatten(), param_dict['sequence_length'])
            y_test_actual = y_test[param_dict['sequence_length']:]
            test_metrics = evaluate_model(model, X_test_seq, y_test_actual, scaler_y, "Test")
            
            # Store comprehensive results
            result = {
                'params': param_dict.copy(),
                'val_loss': min(history.history['val_loss']),
                'epochs_trained': len(history.history['loss']),
                **train_metrics,
                **val_metrics,
                **test_metrics
            }
            results.append(result)
            
            print(f"Train RMSE: {train_metrics['Train_RMSE']}, R²: {train_metrics['Train_R2']}, MAE: {train_metrics['Train_MAE']}, MAPE: {train_metrics['Train_MAPE']}")
            print(f"Val RMSE: {val_metrics['Val_RMSE']}, R²: {val_metrics['Val_R2']}, MAE: {val_metrics['Val_MAE']}, MAPE: {val_metrics['Val_MAPE']}")
            print(f"Test RMSE: {test_metrics['Test_RMSE']}, R²: {test_metrics['Test_R2']}, MAE: {test_metrics['Test_MAE']}, MAPE: {test_metrics['Test_MAPE']}")
            print(f"Overfitting (Train-Test): RMSE: {train_metrics['Train_RMSE'] - test_metrics['Test_RMSE']}, R²: {train_metrics['Train_R2'] - test_metrics['Test_R2']}")
            
            # Update best model based on test RMSE
            if test_metrics['Test_RMSE'] < best_score:
                best_score = test_metrics['Test_RMSE']
                best_params = param_dict.copy()
                best_model = model
                print(f"NEW BEST MODEL! Test RMSE: {best_score}")
            
        except Exception as e:
            print(f"Error: {str(e)}")
            result = {
                'params': param_dict.copy(),
                'error': str(e),
                'RMSE': float('inf')
            }
            results.append(result)
        
        print("-" * 80)
    
    return results, best_params, best_model

In [22]:
param_grid = {
    'sequence_length': [24],  # Fixed based on daily seasonality
    'lstm_units': [
        [64, 32, 16],      # Small model
        [128, 64, 32],     # Medium model
        [256, 128, 64]     # Large model
    ],
    'dropout_rate': [0.2, 0.3],  # Moderate regularization
    'learning_rate': [0.001, 0.005, 0.01],  # Key learning rates
    'batch_size': [32],  # Fixed at common value
    'epochs': [100]
}

param_grid_ultra_fast = {
    'sequence_length': [24],
    'lstm_units': [
        [64, 32, 16],      # Small baseline
        [128, 64, 32]      # Medium model
    ],
    'dropout_rate': [0.2],
    'learning_rate': [0.001, 0.005, 0.01],
    'batch_size': [32],
    'epochs': [50]
}

In [23]:
current_param_grid = param_grid_ultra_fast

print("Parameter Grid Selected:")
for param, values in current_param_grid.items():
    print(f"  {param}: {values}")

combinations = count_combinations(current_param_grid)
estimated_hours = combinations * 5 / 60
print(f"\nTotal combinations: {combinations}")
print(f"Estimated time: {estimated_hours:.1f} hours")

Parameter Grid Selected:
  sequence_length: [24]
  lstm_units: [[64, 32, 16], [128, 64, 32]]
  dropout_rate: [0.2]
  learning_rate: [0.001, 0.005, 0.01]
  batch_size: [32]
  epochs: [50]

Total combinations: 6
Estimated time: 0.5 hours


In [24]:
# RUN GRID SEARCH
results, best_params, best_model = lstm_grid_search(
    X_train, y_train_scaled, X_test, y_test, scaler_y, current_param_grid
)

Starting Grid Search:
- Total combinations: 6
- Estimated time: 0.5 hours

Combination 1/6: {'sequence_length': 24, 'lstm_units': [64, 32, 16], 'dropout_rate': 0.2, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 50}


I0000 00:00:1750224161.383071    1538 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1767 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6
I0000 00:00:1750224173.212590   15274 cuda_dnn.cc:529] Loaded cuDNN version 90300


Train RMSE: 613.74260664454, R²: -122129675.29045981, MAE: 612.7549124810622, MAPE: 2.2570800033074692e+16
Val RMSE: 615.7052136239573, R²: -168211357.37114686, MAE: 614.978567061606, MAPE: 4.4267748026430216e+16
Test RMSE: 31.957280779401998, R²: -0.29526971543191527, MAE: 21.79202163219452, MAPE: 4.5557810940377665
Overfitting (Train-Test): RMSE: 581.785325865138, R²: -122129674.9951901
NEW BEST MODEL! Test RMSE: 31.957280779401998
--------------------------------------------------------------------------------

Combination 2/6: {'sequence_length': 24, 'lstm_units': [64, 32, 16], 'dropout_rate': 0.2, 'learning_rate': 0.005, 'batch_size': 32, 'epochs': 50}
Train RMSE: 620.1667275657135, R²: -124699749.21644221, MAE: 619.3954946433628, MAPE: 2.2857314395540892e+16
Val RMSE: 620.0367228598114, R²: -170586428.85719588, MAE: 619.5415442652276, MAPE: 4.514490695662962e+16
Test RMSE: 29.681148622827898, R²: -0.11733134333630857, MAE: 18.729326566060383, MAPE: 4.081237439039976
Overfitting (

KeyboardInterrupt: 

In [None]:
# Convert results to DataFrame with all metrics
results_df = pd.DataFrame([
    {**r['params'], **{k: v for k, v in r.items() 
                      if k not in ['params', 'Train_predictions', 'Val_predictions', 'Test_predictions', 'error']}}
    for r in results if 'error' not in r
])

In [None]:
def create_results_summary(results_df):
    """Create summary tables and visualizations for thesis"""
    
    # 1. Top 10 models summary
    top_models = results_df.nsmallest(10, 'Test_RMSE').copy()
    top_models['Architecture'] = top_models['lstm_units'].astype(str)
    top_models['Overfitting'] = top_models['Train_RMSE'] - top_models['Test_RMSE']
    
    summary_cols = ['Architecture', 'learning_rate', 'dropout_rate', 
                   'Train_RMSE', 'Val_RMSE', 'Test_RMSE', 
                   'Train_R2', 'Val_R2', 'Test_R2', 
                   'Train_MAE', 'Val_MAE', 'Test_MAE', 
                   'Train_MAPE', 'Val_MAPE', 'Test_MAPE', 'Overfitting']
    
    top_summary = top_models[summary_cols].round(6)
    
    # 2. Best model per architecture
    arch_summary = results_df.groupby('lstm_units').apply(
        lambda x: x.loc[x['Test_RMSE'].idxmin()]
    )[summary_cols].round(6)
    
    # 3. Learning rate analysis
    lr_summary = results_df.groupby('learning_rate').agg({
        'Test_RMSE': ['mean', 'std', 'min'],
        'Test_R2': ['mean', 'std', 'max'],
        'Train_RMSE': ['mean', 'std', 'min'],
        'Train_R2': ['mean', 'std', 'max'],
        'Val_RMSE': ['mean', 'std', 'min'],
        'Val_R2': ['mean', 'std', 'max']
    }).round(6)
    
    return top_summary, arch_summary, lr_summary

In [None]:
def evaluate_model(model, X_seq, y_actual, scaler_y, dataset_name=""):
    """Evaluate model and return metrics for any dataset (train/test)"""
    # Predict
    y_pred_scaled = model.predict(X_seq, verbose=0)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
    
    # Calculate metrics using sklearn functions
    mse = mean_squared_error(y_actual, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_actual, y_pred)
    r2 = r2_score(y_actual, y_pred)
    mape = mean_absolute_percentage_error(y_actual, y_pred) * 100
    
    # Add prefix to metric names if dataset specified
    prefix = f"{dataset_name}_" if dataset_name else ""
    
    return {
        f'{prefix}MSE': mse,
        f'{prefix}RMSE': rmse,
        f'{prefix}MAE': mae,
        f'{prefix}R2': r2,
        f'{prefix}MAPE': mape,
        f'{prefix}predictions': y_pred
    }

In [None]:
def create_results_summary(results_df):
    """Create summary tables and visualizations for thesis"""
    
    # 1. Top 10 models summary
    top_models = results_df.nsmallest(10, 'Test_RMSE').copy()
    top_models['Architecture'] = top_models['lstm_units'].astype(str)
    top_models['Overfitting'] = top_models['Train_RMSE'] - top_models['Test_RMSE']
    
    summary_cols = ['Architecture', 'learning_rate', 'dropout_rate', 
                   'Train_RMSE', 'Test_RMSE', 'Train_R2', 'Test_R2', 
                   'Train_MAE', 'Test_MAE', 'Train_MAPE', 'Test_MAPE', 'Overfitting']
    
    top_summary = top_models[summary_cols].round(4)
    
    # 2. Best model per architecture
    arch_summary = results_df.groupby('lstm_units').apply(
        lambda x: x.loc[x['Test_RMSE'].idxmin()]
    )[summary_cols].round(4)
    
    # 3. Learning rate analysis
    lr_summary = results_df.groupby('learning_rate').agg({
        'Test_RMSE': ['mean', 'std', 'min'],
        'Test_R2': ['mean', 'std', 'max'],
        'Train_RMSE': ['mean', 'std', 'min'],
        'Train_R2': ['mean', 'std', 'max']
    }).round(4)
    
    return top_summary, arch_summary, lr_summary

In [None]:
def count_combinations(param_grid):
    """Count total parameter combinations"""
    total = 1
    for param, values in param_grid.items():
        total *= len(values)
    return total

In [None]:
def lstm_grid_search(X_train, y_train_scaled, X_test, y_test, scaler_y, param_grid):
    """
    Perform grid search for LSTM hyperparameters
    """
    results = []
    best_score = float('inf')
    best_params = None
    best_model = None
    
    # Generate all parameter combinations
    param_combinations = list(product(*param_grid.values()))
    param_names = list(param_grid.keys())
    
    total_combinations = len(param_combinations)
    estimated_time = total_combinations * 5 / 60  # 5 minutes per combination
    
    print(f"Starting Grid Search:")
    print(f"- Total combinations: {total_combinations}")
    print(f"- Estimated time: {estimated_time:.1f} hours")
    print(f"{'='*80}")
    
    for i, params in enumerate(param_combinations):
        param_dict = dict(zip(param_names, params))
        print(f"\nCombination {i+1}/{total_combinations}: {param_dict}")
        
        try:
            # Create sequences
            X_seq, y_seq = create_sequences(X_train, y_train_scaled, param_dict['sequence_length'])
            
            # Split for validation (80% train, 20% validation)
            val_size = int(len(X_seq) * 0.2)
            X_train_fold = X_seq[:-val_size]
            y_train_fold = y_seq[:-val_size]
            X_val_fold = X_seq[-val_size:]
            y_val_fold = y_seq[-val_size:]
            
            # Build model
            model = build_lstm_model(
                sequence_length=param_dict['sequence_length'],
                n_features=X_seq.shape[2],
                lstm_units=param_dict['lstm_units'],
                dropout_rate=param_dict['dropout_rate'],
                learning_rate=param_dict['learning_rate']
            )
            
            # Callbacks
            early_stopping = EarlyStopping(
                monitor='val_loss', patience=15, restore_best_weights=True, verbose=0
            )
            reduce_lr = ReduceLROnPlateau(
                monitor='val_loss', factor=0.5, patience=8, min_lr=0.00001, verbose=0
            )
            
            # Train model
            history = model.fit(
                X_train_fold, y_train_fold,
                epochs=param_dict['epochs'],
                batch_size=param_dict['batch_size'],
                validation_data=(X_val_fold, y_val_fold),
                callbacks=[early_stopping, reduce_lr],
                verbose=0
            )
            
            # Evaluate on both training and test sets
            # Training evaluation (use validation fold for fair comparison)
            y_train_fold_actual = scaler_y.inverse_transform(y_train_fold.reshape(-1, 1)).flatten()
            train_metrics = evaluate_model(model, X_train_fold, y_train_fold_actual, scaler_y, "Train")
            
            # Test evaluation
            X_test_seq, _ = create_sequences(X_test, y_test.flatten(), param_dict['sequence_length'])
            y_test_actual = y_test[param_dict['sequence_length']:]
            test_metrics = evaluate_model(model, X_test_seq, y_test_actual, scaler_y, "Test")
            
            # Store comprehensive results
            result = {
                'params': param_dict.copy(),
                'val_loss': min(history.history['val_loss']),
                'epochs_trained': len(history.history['loss']),
                **train_metrics,
                **test_metrics
            }
            results.append(result)
            
            print(f"Train RMSE: {train_metrics['Train_RMSE']:.4f}, R²: {train_metrics['Train_R2']:.4f}")
            print(f"Test RMSE: {test_metrics['Test_RMSE']:.4f}, R²: {test_metrics['Test_R2']:.4f}")
            print(f"Overfitting: {train_metrics['Train_RMSE'] - test_metrics['Test_RMSE']:.4f}")
            
            # Update best model based on test RMSE
            if test_metrics['Test_RMSE'] < best_score:
                best_score = test_metrics['Test_RMSE']
                best_params = param_dict.copy()
                best_model = model
                print(f"NEW BEST MODEL! Test RMSE: {best_score:.4f}")
            
        except Exception as e:
            print(f"Error: {str(e)}")
            result = {
                'params': param_dict.copy(),
                'error': str(e),
                'RMSE': float('inf')
            }
            results.append(result)
        
        print("-" * 80)
    
    return results, best_params, best_model

In [None]:
param_grid_5h = {
    'sequence_length': [24],
    'lstm_units': [
        [64, 32, 16],
        [128, 64, 32],
        [256, 128, 64]
    ], 
    'dropout_rate': [0.2, 0.3],
    'learning_rate': [0.001, 0.005, 0.01],
    'batch_size': [32],
    'epochs': [100]
}

# Alternative: ULTRA-FAST Grid for initial exploration (6 combinations ≈ 30 minutes)
param_grid_ultra_fast = {
    'sequence_length': [24],
    'lstm_units': [
        [64, 32, 16],
        [128, 64, 32]
    ],
    'dropout_rate': [0.2], 
    'learning_rate': [0.001, 0.005, 0.01],  
    'batch_size': [32],
    'epochs': [50]
}

In [None]:
current_param_grid = param_grid_ultra_fast

print("Parameter Grid Selected:")
for param, values in current_param_grid.items():
    print(f"  {param}: {values}")

combinations = count_combinations(current_param_grid)
estimated_hours = combinations * 5 / 60
print(f"\nTotal combinations: {combinations}")
print(f"Estimated time: {estimated_hours:.1f} hours")

# ================================
# RUN GRID SEARCH
# ================================

print(f"\n{'='*60}\nSTARTING GRID SEARCH\n{'='*60}")

results, best_params, best_model = lstm_grid_search(
    X_train, y_train_scaled, X_test, y_test, scaler_y, current_param_grid
)

In [None]:
results_df = pd.DataFrame([
    {**r['params'], **{k: v for k, v in r.items() 
                      if k not in ['params', 'Train_predictions', 'Test_predictions', 'error']}}
    for r in results if 'error' not in r
])

In [None]:
if len(results_df) > 0:
    # Generate summary tables for thesis
    top_summary, arch_summary, lr_summary = create_results_summary(results_df)
    
    print("TOP 10 MODELS (for main thesis table):")
    print(top_summary.to_string(index=False))
    
    print(f"\n🏗️ BEST MODEL PER ARCHITECTURE:")
    print(arch_summary.to_string())
    
    print(f"\n📈 LEARNING RATE ANALYSIS:")
    print(lr_summary.to_string())
    
    # Basic statistics
    print(f"\n📋 SUMMARY STATISTICS:")
    print(f"Best Test RMSE: {results_df['Test_RMSE'].min():.4f}")
    print(f"Best Test R²: {results_df['Test_R2'].max():.4f}")
    print(f"Average Overfitting: {(results_df['Train_RMSE'] - results_df['Test_RMSE']).mean():.4f}")
    
    # Save complete results to CSV for appendix
    results_df.to_csv('lstm_grid_search_complete_results.csv', index=False)
    print(f"\nComplete results saved to 'lstm_grid_search_complete_results.csv'")
    
    # Save summary tables for main thesis
    top_summary.to_csv('lstm_top_models_summary.csv', index=False)
    arch_summary.to_csv('lstm_architecture_comparison.csv')
    # print(f"\n Summary tables saved for thesis:")
    # print(" lstm_top_models_summary.csv (for main results section)")
    # print(" lstm_architecture_comparison.csv (for architecture analysis)")

print(f"\nBest Parameters: {best_params}")
print(f"Best Test RMSE: {best_score:.4f}")

In [None]:
def count_combinations(grid):
    total = 1
    for param, values in grid.items():
        total *= len(values)
    return total

In [None]:
combinations_5h = count_combinations(param_grid_5h)
combinations_ultra = count_combinations(param_grid_ultra_fast)

print("REVISED PARAMETER GRIDS:")
print(f"5-Hour Grid: {combinations_5h} combinations (~{combinations_5h * 5 / 60:.1f} hours)")
print(f"Ultra-Fast Grid: {combinations_ultra} combinations (~{combinations_ultra * 5 / 60:.1f} hours)")

In [15]:
# Create sequences
SEQUENCE_LENGTH = 24

X_seq, y_seq = create_sequences(X_train, y_train_scaled, SEQUENCE_LENGTH)
print(f"Sequence features shape: {X_seq.shape}")
print(f"Sequence target shape: {y_seq.shape}")

Sequence features shape: (29616, 24, 6)
Sequence target shape: (29616,)


In [9]:
# Build LSTM Model
model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(SEQUENCE_LENGTH, X_seq.shape[2])),
    Dropout(0.2),

    LSTM(64, return_sequences=True),
    Dropout(0.2),

    LSTM(32, return_sequences=False),
    Dropout(0.2),

    Dense(16, activation='relu'),
    Dense(1)
])

In [10]:
# Compile model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

# Print model summary
model.summary()

In [11]:
# Set up callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=0.00001,
    verbose=1
)

In [13]:
# Train the model
history = model.fit(
    X_seq, y_seq,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

ValueError: Invalid dtype: object