<a href="https://colab.research.google.com/github/amirmohammadkalateh/gool-/blob/main/gool!_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Set a seed for reproducibility
np.random.seed(42)
keras.utils.set_random_seed(42)

# Step 2: Load and Prepare Data.
print("Loading data from 'final_dataset.csv'...")
try:
    data = pd.read_csv('final_dataset.csv')
    print("Data loaded successfully. Here's a quick look at the first 5 rows:")
    print(data.head())
except FileNotFoundError:
    print("Error: 'final_dataset.csv' not found. Please ensure the file is in the same directory.")
    exit()

# Drop the 'Date' column as it's a string and not directly useful for a numeric model.
if 'Date' in data.columns:
    data = data.drop('Date', axis=1)

# Define the features (X) and the target variable (y).
X = data.drop('AQI', axis=1)
y = data['AQI']

# Split the data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def create_model(input_shape, optimizer_name='adam', init_mode='he_uniform', dropout_rate=0.2,
                 l1_reg=0.01, l2_reg=0.01, clipvalue=1.0):
    model = Sequential()

    # Input layer and first hidden layer with Batch Normalization
    model.add(Dense(64, input_shape=input_shape, kernel_initializer=init_mode,
                    activation='relu', kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
                    kernel_constraint=MaxNorm(3)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))

    # Second hidden layer with Batch Normalization
    model.add(Dense(32, activation='relu', kernel_initializer=init_mode,
                    kernel_regularizer=l1_l2(l1=l1_reg, l2=l2_reg),
                    kernel_constraint=MaxNorm(3)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))

    # Output layer (for regression)
    model.add(Dense(1))

    # optimizer
    optimizer = Adam(clipvalue=clipvalue)

    # Compile the model
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])

    return model

# Perform a Manual Grid Search.
# i define the hyperparameters to search over.
param_grid = {
    'dropout_rate': [0.1, 0.2],
    'l1_reg': [0.001, 0.01],
    'l2_reg': [0.001, 0.01],
    'clipvalue': [1.0, 0.5],
    'batch_size': [16, 32],
    'epochs': [50, 100]
}

best_score = float('inf')
best_params = {}
best_model = None

input_shape = (X_train_scaled.shape[1],)

print("\nStarting manual Grid Search for hyperparameter tuning. This may take some time...")

# Iterate through each hyperparameter combination
for dropout_rate in param_grid['dropout_rate']:
    for l1_reg in param_grid['l1_reg']:
        for l2_reg in param_grid['l2_reg']:
            for clipvalue in param_grid['clipvalue']:
                for batch_size in param_grid['batch_size']:
                    for epochs in param_grid['epochs']:

                        # Create the model with the current hyperparameters
                        model = create_model(input_shape=input_shape,
                                             dropout_rate=dropout_rate,
                                             l1_reg=l1_reg,
                                             l2_reg=l2_reg,
                                             clipvalue=clipvalue)

                        # Define the EarlyStopping callback
                        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

                        # Train the model with the current hyperparameters
                        history = model.fit(X_train_scaled, y_train,
                                            batch_size=batch_size,
                                            epochs=epochs,
                                            validation_split=0.1,
                                            callbacks=[early_stopping],
                                            verbose=0)

                        # Evaluate the model on the validation set
                        val_loss = min(history.history['val_loss'])

                        print(f"Tested params: Dropout={dropout_rate}, L1={l1_reg}, L2={l2_reg}, Clip={clipvalue}, Batch={batch_size}, Epochs={epochs}, Val Loss={val_loss:.4f}")

                        # Check if this is the best model so far
                        if val_loss < best_score:
                            best_score = val_loss
                            best_params = {
                                'dropout_rate': dropout_rate,
                                'l1_reg': l1_reg,
                                'l2_reg': l2_reg,
                                'clipvalue': clipvalue,
                                'batch_size': batch_size,
                                'epochs': epochs
                            }
                            best_model = model

print("\n--- Manual Grid Search Results ---")
print(f"Best validation loss: {best_score:.4f}")
print("Best parameters found: ", best_params)

# Evaluate the best model
if best_model:
    test_loss, test_mae = best_model.evaluate(X_test_scaled, y_test, verbose=0)
    print(f"Final best model evaluation on test data:")
    print(f"  Test Loss (MSE): {test_loss:.4f}")
    print(f"  Test MAE: {test_mae:.4f}")

    # Make predictions
    predictions = best_model.predict(X_test_scaled)
    print("\nExample predictions vs actual values:")
    for i in range(5):
        print(f"  Predicted: {predictions[i][0]:.2f}, Actual: {y_test.iloc[i]:.2f}")



Loading data from 'final_dataset.csv'...
Data loaded successfully. Here's a quick look at the first 5 rows:
   Date  Month  Year  Holidays_Count  Days   PM2.5    PM10     NO2    SO2  \
0     1      1  2021               0     5  408.80  442.42  160.61  12.95   
1     2      1  2021               0     6  404.04  561.95   52.85   5.18   
2     3      1  2021               1     7  225.07  239.04  170.95  10.93   
3     4      1  2021               0     1   89.55  132.08  153.98  10.42   
4     5      1  2021               0     2   54.06   55.54  122.66   9.70   

     CO  Ozone  AQI  
0  2.77  43.19  462  
1  2.60  16.43  482  
2  1.40  44.29  263  
3  1.01  49.19  207  
4  0.64  48.88  149  

Starting manual Grid Search for hyperparameter tuning. This may take some time...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=1.0, Batch=16, Epochs=50, Val Loss=3395.6484
Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=1.0, Batch=16, Epochs=100, Val Loss=3028.9111
Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=1.0, Batch=32, Epochs=50, Val Loss=12521.9336
Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=1.0, Batch=32, Epochs=100, Val Loss=2667.8389
Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=0.5, Batch=16, Epochs=50, Val Loss=2622.7410
Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=0.5, Batch=16, Epochs=100, Val Loss=2503.2568
Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=0.5, Batch=32, Epochs=50, Val Loss=11730.9141
Tested params: Dropout=0.1, L1=0.001, L2=0.001, Clip=0.5, Batch=32, Epochs=100, Val Loss=2149.5513
Tested params: Dropout=0.1, L1=0.001, L2=0.01, Clip=1.0, Batch=16, Epochs=50, Val Loss=3316.8320
Tested params: Dropout=0.1, L1=0.001, L2=0.01, Clip=1.0, Batch=16, Epochs=100, Val Loss=2249.6790
Tested params: 