# Air Quality Prediction using LSTM


- ### **This notebook implements a Long Short-Term Memory (LSTM) neural network model to predict air quality pollutant levels based on historical data.**

## Import Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import json

## Configuration Parameters

In [None]:
# Configuration parameters
SEQUENCE_LENGTH = 7  # Number of days to use for prediction
POLLUTANTS = ['pm2_5', 'pm10', 'so2', 'no', 'no2', 'nh3', 'o3', 'co']
TEST_SPLIT = 0.2  # Proportion of data to use for testing
MODEL_PARAMS = {
    'lstm_units': 64,
    'dropout_rate': 0.2,
    'epochs': 50,
    'batch_size': 32,
    'patience': 10
}

## Data Loading and Preparation
- This function loads data from individual station files and groups them by state.

In [None]:
def load_and_prepare_data(data_dir, metadata_file):
    """
    Load data from individual station files and group by state
    
    Parameters:
    - data_dir: Directory containing station_<id>_aqi_data.csv files
    - metadata_file: Path to metadata.csv with station information
    
    Returns:
    - Dictionary with state names as keys and DataFrames as values
    """
    # Load metadata
    metadata = pd.read_csv(metadata_file)
    print(f"Loaded metadata with {len(metadata)} stations")
    
    # Initialize dictionary to store dataframes by state
    state_data = {}
    
    # Get list of all CSV files
    csv_files = [f for f in os.listdir(data_dir) if f.startswith('station_') and f.endswith('_aqi_data.csv')]
    print(f"Found {len(csv_files)} station data files")
    
    for file in csv_files:
        # Extract station_id from filename
        station_id = file.split('_')[1]
        
        # Find state for this station
        station_info = metadata[metadata['id'] == int(station_id)]
        
        if len(station_info) == 0:
            print(f"Warning: No metadata found for station {station_id}")
            continue
            
        state = station_info['state'].iloc[0]
        
        # Load station data
        df = pd.read_csv(os.path.join(data_dir, file))
        
        # Add station metadata
        df['id'] = station_id
        df['state'] = state
        
        # Convert date to datetime
        if 'date' in df.columns:
            df['date'] = pd.to_datetime(df['date'])
        
        # Add to state_data dictionary
        if state not in state_data:
            state_data[state] = []
        
        state_data[state].append(df)
    
    # Combine all dataframes for each state
    for state in state_data:
        state_data[state] = pd.concat(state_data[state], ignore_index=True)
        print(f"State {state}: {len(state_data[state])} records")
    
    return state_data

## Sequence Creation for LSTM
- This function creates sequences of data for LSTM input.

In [None]:
def create_sequences(data, sequence_length=SEQUENCE_LENGTH):
    """
    Create sequences for LSTM input
    
    Parameters:
    - data: DataFrame with pollutant data
    - sequence_length: Number of days to use for prediction
    
    Returns:
    - X: Input sequences
    - y: Target values
    """
    X, y = [], []
    
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    
    return np.array(X), np.array(y)

## Data Preprocessing
- This function preprocesses data for a single state, including handling missing values, scaling, and creating sequences.

In [None]:
def preprocess_state_data(state_df):
    """
    Preprocess data for a single state
    
    Parameters:
    - state_df: DataFrame with pollutant data for a state
    
    Returns:
    - Dictionary with training and testing data
    """
    # Sort by date
    if 'date' in state_df.columns:
        state_df = state_df.sort_values('date')
    
    # Select only pollutant columns
    pollutant_data = state_df[POLLUTANTS].copy()
    
    # Handle missing values
    pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
    
    # Scale the data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(pollutant_data)
    
    # Create sequences
    X, y = create_sequences(scaled_data)
    
    # Split into training and testing sets
    split_idx = int(len(X) * (1 - TEST_SPLIT))
    
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    
    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_test': X_test,
        'y_test': y_test,
        'scaler': scaler
    }

## LSTM Model Architecture
- This function builds the LSTM model architecture for pollutant prediction.

In [None]:
def build_lstm_model(input_shape):
    """
    Build LSTM model for pollutant prediction
    
    Parameters:
    - input_shape: Shape of input sequences
    
    Returns:
    - Compiled LSTM model
    """
    model = Sequential([
        LSTM(MODEL_PARAMS['lstm_units'], return_sequences=True, input_shape=input_shape),
        Dropout(MODEL_PARAMS['dropout_rate']),
        LSTM(MODEL_PARAMS['lstm_units']),
        Dropout(MODEL_PARAMS['dropout_rate']),
        Dense(len(POLLUTANTS))  # Output layer with one neuron per pollutant
    ])
    
    model.compile(optimizer='adam', loss='mse')
    return model

## Model Training Function
This function trains an LSTM model for a single state and evaluates its performance.

In [None]:
def train_state_model(state_name, preprocessed_data):
    """
    Train LSTM model for a single state
    
    Parameters:
    - state_name: Name of the state
    - preprocessed_data: Dictionary with training and testing data
    
    Returns:
    - Dictionary with model, history, and evaluation metrics
    """
    print(f"\nTraining model for state: {state_name}")
    
    X_train = preprocessed_data['X_train']
    y_train = preprocessed_data['y_train']
    X_test = preprocessed_data['X_test']
    y_test = preprocessed_data['y_test']
    
    # Build model
    input_shape = (X_train.shape[1], X_train.shape[2])
    model = build_lstm_model(input_shape)
    
    # Define early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=MODEL_PARAMS['patience'],
        restore_best_weights=True
    )
    
    # Train model
    history = model.fit(
        X_train, y_train,
        epochs=MODEL_PARAMS['epochs'],
        batch_size=MODEL_PARAMS['batch_size'],
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Inverse transform predictions and actual values
    scaler = preprocessed_data['scaler']
    y_test_orig = scaler.inverse_transform(y_test)
    y_pred_orig = scaler.inverse_transform(y_pred)
    
    # Calculate metrics
    metrics = {}
    for i, pollutant in enumerate(POLLUTANTS):
        metrics[pollutant] = {
            'mse': mean_squared_error(y_test_orig[:, i], y_pred_orig[:, i]),
            'rmse': np.sqrt(mean_squared_error(y_test_orig[:, i], y_pred_orig[:, i])),
            'mae': mean_absolute_error(y_test_orig[:, i], y_pred_orig[:, i]),
            'r2': r2_score(y_test_orig[:, i], y_pred_orig[:, i])
        }
    
    return {
        'model': model,
        'history': history,
        'y_test': y_test_orig,
        'y_pred': y_pred_orig,
        'metrics': metrics,
        'scaler': scaler
    }

## Data Loading Example
- Let's load the data using the functions defined above.

In [2]:


# Define paths
data_dir = "/kaggle/input/india-aqi-data-2020-25/final_data/india_aqi_data_withcityname"  # Update with your data directory
metadata_file = "/kaggle/input/india-aqi-data-2020-25/final_data/metadata.csv"  # Update with your metadata file path
    
# Load and group data by state
state_data = load_and_prepare_data(data_dir, metadata_file)
    


Loaded metadata with 662 stations
Found 662 station data files
State Delhi: 113040 records
State Karnataka: 80070 records
State Telangana: 29830 records
State Gujarat: 28260 records
State Rajasthan: 75360 records
State Haryana: 61179 records
State Tamil Nadu: 69080 records
State Karanataka: 1570 records
State Kerala: 14130 records
State Uttar Pradesh: 103620 records
State Maharashtra: 158570 records
State Madhya Pradesh: 51810 records
State Bihar: 61230 records
State West Bengal: 47100 records
State Punjab: 14130 records
State Chhattisgarh: 20410 records
State Meghalaya: 4710 records
State Odisha: 29830 records
State Tripura: 3140 records
State Assam: 15700 records
State Andhra Pradesh: 23550 records
State Jharkhand: 4710 records
State Manipur: 6280 records
State Puducherry: 1570 records
State Chandigarh: 4710 records
State Nagaland: 1570 records
State Sikkim: 3140 records
State Mizoram: 1570 records
State Uttarakhand: 4710 records
State Himachal Pradesh: 1570 records
State Jammu and K

## Model Training Example
- Training a model for each state.

In [16]:
import json
# Create models directory if it doesn't exist
models_dir = "/kaggle/working/trained_models"
os.makedirs(models_dir, exist_ok=True)

# Train models for each state
state_results = {}
    
for state, df in state_data.items():
        # Skip states with insufficient data
        if len(df) < SEQUENCE_LENGTH * 2:
            print(f"Skipping state {state} due to insufficient data")
            continue
        
        # Create state directory for model artifacts
        state_model_dir = os.path.join(models_dir, state)
        os.makedirs(state_model_dir, exist_ok=True)
        
        # Preprocess data
        preprocessed_data = preprocess_state_data(df)
        
        # Train model
        state_results[state] = train_state_model(state, preprocessed_data)
        
        # Save model
        model_path = os.path.join(state_model_dir, f"{state}_model.h5")
        state_results[state]['model'].save(model_path)
        print(f"Model saved to {model_path}")
        
        # Save scaler using pickle
        import pickle
        scaler_path = os.path.join(state_model_dir, f"{state}_scaler.pkl")
        with open(scaler_path, 'wb') as f:
            pickle.dump(state_results[state]['scaler'], f)
        print(f"Scaler saved to {scaler_path}")
        
        # Save model configuration as JSON
        model_config = {
            'pollutants': POLLUTANTS,
            'sequence_length': SEQUENCE_LENGTH,
            'model_params': MODEL_PARAMS,
            'training_date': str(pd.Timestamp.now())
        }
        
        config_path = os.path.join(state_model_dir, f"{state}_config.json")
        with open(config_path, 'w') as f:
            json.dump(model_config, f, indent=4)
        print(f"Model configuration saved to {config_path}")
    


  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')



Training model for state: Delhi
Epoch 1/50


  super().__init__(**kwargs)


[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 6ms/step - loss: 0.0126 - val_loss: 0.0041
Epoch 2/50
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.0041 - val_loss: 0.0039
Epoch 3/50
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.0038 - val_loss: 0.0038
Epoch 4/50
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.0036 - val_loss: 0.0039
Epoch 5/50
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.0035 - val_loss: 0.0037
Epoch 6/50
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.0034 - val_loss: 0.0036
Epoch 7/50
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 6ms/step - loss: 0.0034 - val_loss: 0.0036
Epoch 8/50
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.0034 - val_loss: 0.0036
Epoch 9/50
[1m2261/2261[0

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


Epoch 1/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - loss: 0.0123 - val_loss: 0.0027
Epoch 2/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0035 - val_loss: 0.0027
Epoch 3/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 0.0031 - val_loss: 0.0027
Epoch 4/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0029 - val_loss: 0.0026
Epoch 5/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0027 - val_loss: 0.0028
Epoch 6/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0027 - val_loss: 0.0027
Epoch 7/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 0.0027 - val_loss: 0.0027
Epoch 8/50
[1m1602/1602[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0027 - val_loss: 0.0026
Epoch 9/50
[1m1602/160

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0196 - val_loss: 0.0035
Epoch 2/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 0.0063 - val_loss: 0.0031
Epoch 3/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0054 - val_loss: 0.0031
Epoch 4/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0051 - val_loss: 0.0030
Epoch 5/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0048 - val_loss: 0.0031
Epoch 6/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0047 - val_loss: 0.0030
Epoch 7/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0045 - val_loss: 0.0031
Epoch 8/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0044 - val_loss: 0.0030
Epoch 9/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0242 - val_loss: 0.0058
Epoch 2/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0069 - val_loss: 0.0056
Epoch 3/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0063 - val_loss: 0.0054
Epoch 4/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0057 - val_loss: 0.0054
Epoch 5/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0055 - val_loss: 0.0054
Epoch 6/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0053 - val_loss: 0.0055
Epoch 7/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0052 - val_loss: 0.0055
Epoch 8/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0049 - val_loss: 0.0053
Epoch 9/50
[1m566/566[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


Epoch 1/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - loss: 0.0111 - val_loss: 0.0021
Epoch 2/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 0.0028 - val_loss: 0.0020
Epoch 3/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0024 - val_loss: 0.0020
Epoch 4/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0022 - val_loss: 0.0021
Epoch 5/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.0021 - val_loss: 0.0020
Epoch 6/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 0.0021 - val_loss: 0.0022
Epoch 7/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0021 - val_loss: 0.0019
Epoch 8/50
[1m1508/1508[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0021 - val_loss: 0.0020
Epoch 9/50
[1m1508/150

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


Epoch 1/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0106 - val_loss: 0.0033
Epoch 2/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0034 - val_loss: 0.0032
Epoch 3/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 0.0031 - val_loss: 0.0031
Epoch 4/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0030 - val_loss: 0.0031
Epoch 5/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0029 - val_loss: 0.0031
Epoch 6/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0028 - val_loss: 0.0031
Epoch 7/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0029 - val_loss: 0.0031
Epoch 8/50
[1m1224/1224[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 0.0029 - val_loss: 0.0031
Epoch 9/50
[1m1224/1224

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


Epoch 1/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 0.0149 - val_loss: 0.0037
Epoch 2/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0054 - val_loss: 0.0036
Epoch 3/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 0.0049 - val_loss: 0.0036
Epoch 4/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0047 - val_loss: 0.0036
Epoch 5/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0047 - val_loss: 0.0036
Epoch 6/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0045 - val_loss: 0.0036
Epoch 7/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0044 - val_loss: 0.0036
Epoch 8/50
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.0044 - val_loss: 0.0036
Epoch 9/50
[1m1382/1382

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 0.1448 - val_loss: 0.0149
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0227 - val_loss: 0.0099
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0196 - val_loss: 0.0099
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0171 - val_loss: 0.0091
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0156 - val_loss: 0.0090
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0152 - val_loss: 0.0090
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0141 - val_loss: 0.0089
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0129 - val_loss: 0.0085
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0442 - val_loss: 0.0051
Epoch 2/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0086 - val_loss: 0.0048
Epoch 3/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0073 - val_loss: 0.0047
Epoch 4/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0067 - val_loss: 0.0048
Epoch 5/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0065 - val_loss: 0.0048
Epoch 6/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.0063 - val_loss: 0.0048
Epoch 7/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0059 - val_loss: 0.0048
Epoch 8/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0058 - val_loss: 0.0046
Epoch 9/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')



Training model for state: Uttar Pradesh
Epoch 1/50


  super().__init__(**kwargs)


[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 5ms/step - loss: 0.0094 - val_loss: 0.0042
Epoch 2/50
[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0041 - val_loss: 0.0038
Epoch 3/50
[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0038 - val_loss: 0.0038
Epoch 4/50
[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0037 - val_loss: 0.0038
Epoch 5/50
[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0036 - val_loss: 0.0038
Epoch 6/50
[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0036 - val_loss: 0.0040
Epoch 7/50
[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0037 - val_loss: 0.0038
Epoch 8/50
[1m2073/2073[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.0037 - val_loss: 0.0038
Epoch 9/50
[1m2073/2073[0

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')



Training model for state: Maharashtra
Epoch 1/50


  super().__init__(**kwargs)


[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 5ms/step - loss: 0.0104 - val_loss: 0.0064
Epoch 2/50
[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0059 - val_loss: 0.0065
Epoch 3/50
[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0057 - val_loss: 0.0062
Epoch 4/50
[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0056 - val_loss: 0.0062
Epoch 5/50
[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0057 - val_loss: 0.0062
Epoch 6/50
[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0056 - val_loss: 0.0062
Epoch 7/50
[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0057 - val_loss: 0.0062
Epoch 8/50
[1m3172/3172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 5ms/step - loss: 0.0056 - val_loss: 0.0062
Epoch 9/50
[1m3172/3172[0

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


Epoch 1/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 0.0163 - val_loss: 0.0027
Epoch 2/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0042 - val_loss: 0.0026
Epoch 3/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0036 - val_loss: 0.0027
Epoch 4/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0032 - val_loss: 0.0026
Epoch 5/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0032 - val_loss: 0.0025
Epoch 6/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.0031 - val_loss: 0.0025
Epoch 7/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.0030 - val_loss: 0.0026
Epoch 8/50
[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.0030 - val_loss: 0.0025
Epoch 9/50
[1m1037/1037

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


Epoch 1/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 0.0154 - val_loss: 0.0034
Epoch 2/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 0.0050 - val_loss: 0.0033
Epoch 3/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0045 - val_loss: 0.0033
Epoch 4/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0042 - val_loss: 0.0034
Epoch 5/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0040 - val_loss: 0.0033
Epoch 6/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0040 - val_loss: 0.0033
Epoch 7/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0039 - val_loss: 0.0034
Epoch 8/50
[1m1225/1225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 0.0039 - val_loss: 0.0033
Epoch 9/50
[1m1225/1225

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - loss: 0.0210 - val_loss: 0.0100
Epoch 2/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0108 - val_loss: 0.0099
Epoch 3/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0102 - val_loss: 0.0098
Epoch 4/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0098 - val_loss: 0.0096
Epoch 5/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0096 - val_loss: 0.0095
Epoch 6/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0095 - val_loss: 0.0095
Epoch 7/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0094 - val_loss: 0.0097
Epoch 8/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0093 - val_loss: 0.0093
Epoch 9/50
[1m942/942[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0359 - val_loss: 0.0035
Epoch 2/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.0069 - val_loss: 0.0036
Epoch 3/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.0056 - val_loss: 0.0035
Epoch 4/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0053 - val_loss: 0.0036
Epoch 5/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0049 - val_loss: 0.0036
Epoch 6/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0048 - val_loss: 0.0034
Epoch 7/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0045 - val_loss: 0.0034
Epoch 8/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0043 - val_loss: 0.0034
Epoch 9/50
[1m283/283[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 0.0266 - val_loss: 0.0058
Epoch 2/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0086 - val_loss: 0.0052
Epoch 3/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0076 - val_loss: 0.0049
Epoch 4/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0071 - val_loss: 0.0050
Epoch 5/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0067 - val_loss: 0.0048
Epoch 6/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0066 - val_loss: 0.0048
Epoch 7/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0064 - val_loss: 0.0055
Epoch 8/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0064 - val_loss: 0.0048
Epoch 9/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0792 - val_loss: 0.0068
Epoch 2/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0142 - val_loss: 0.0059
Epoch 3/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0127 - val_loss: 0.0060
Epoch 4/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0114 - val_loss: 0.0057
Epoch 5/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0103 - val_loss: 0.0060
Epoch 6/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0103 - val_loss: 0.0053
Epoch 7/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0095 - val_loss: 0.0053
Epoch 8/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0098 - val_loss: 0.0058
Epoch 9/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0221 - val_loss: 0.0049
Epoch 2/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0066 - val_loss: 0.0049
Epoch 3/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0058 - val_loss: 0.0047
Epoch 4/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0054 - val_loss: 0.0047
Epoch 5/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0052 - val_loss: 0.0048
Epoch 6/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0050 - val_loss: 0.0048
Epoch 7/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0049 - val_loss: 0.0047
Epoch 8/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0049 - val_loss: 0.0047
Epoch 9/50
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.1105 - val_loss: 0.0101
Epoch 2/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0178 - val_loss: 0.0092
Epoch 3/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0162 - val_loss: 0.0098
Epoch 4/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0146 - val_loss: 0.0087
Epoch 5/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0124 - val_loss: 0.0080
Epoch 6/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0114 - val_loss: 0.0074
Epoch 7/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0113 - val_loss: 0.0069
Epoch 8/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0106 - val_loss: 0.0066
Epoch 9/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0330 - val_loss: 0.0045
Epoch 2/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0082 - val_loss: 0.0041
Epoch 3/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0070 - val_loss: 0.0040
Epoch 4/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0063 - val_loss: 0.0044
Epoch 5/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.0060 - val_loss: 0.0038
Epoch 6/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.0059 - val_loss: 0.0038
Epoch 7/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0056 - val_loss: 0.0038
Epoch 8/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0054 - val_loss: 0.0037
Epoch 9/50
[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 0.0222 - val_loss: 0.0030
Epoch 2/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0054 - val_loss: 0.0028
Epoch 3/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0047 - val_loss: 0.0029
Epoch 4/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0044 - val_loss: 0.0029
Epoch 5/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0041 - val_loss: 0.0028
Epoch 6/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0039 - val_loss: 0.0028
Epoch 7/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 0.0038 - val_loss: 0.0028
Epoch 8/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0037 - val_loss: 0.0027
Epoch 9/50
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0540 - val_loss: 0.0074
Epoch 2/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0144 - val_loss: 0.0067
Epoch 3/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0121 - val_loss: 0.0059
Epoch 4/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0110 - val_loss: 0.0060
Epoch 5/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0104 - val_loss: 0.0062
Epoch 6/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0097 - val_loss: 0.0054
Epoch 7/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0089 - val_loss: 0.0054
Epoch 8/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0089 - val_loss: 0.0052
Epoch 9/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.0870 - val_loss: 0.0069
Epoch 2/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0154 - val_loss: 0.0054
Epoch 3/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0118 - val_loss: 0.0047
Epoch 4/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0101 - val_loss: 0.0044
Epoch 5/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0088 - val_loss: 0.0038
Epoch 6/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0080 - val_loss: 0.0037
Epoch 7/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0071 - val_loss: 0.0031
Epoch 8/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0068 - val_loss: 0.0033
Epoch 9/50
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.1356 - val_loss: 0.0104
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0224 - val_loss: 0.0093
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0181 - val_loss: 0.0088
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0171 - val_loss: 0.0085
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0161 - val_loss: 0.0078
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0140 - val_loss: 0.0072
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0147 - val_loss: 0.0069
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0134 - val_loss: 0.0074
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0805 - val_loss: 0.0112
Epoch 2/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0156 - val_loss: 0.0094
Epoch 3/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.0130 - val_loss: 0.0087
Epoch 4/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0117 - val_loss: 0.0080
Epoch 5/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0107 - val_loss: 0.0076
Epoch 6/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0101 - val_loss: 0.0071
Epoch 7/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0089 - val_loss: 0.0068
Epoch 8/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0083 - val_loss: 0.0062
Epoch 9/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.1945 - val_loss: 0.0169
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0307 - val_loss: 0.0129
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0246 - val_loss: 0.0110
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0224 - val_loss: 0.0109
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0201 - val_loss: 0.0116
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0194 - val_loss: 0.0098
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0185 - val_loss: 0.0094
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0172 - val_loss: 0.0097
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.1212 - val_loss: 0.0065
Epoch 2/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0160 - val_loss: 0.0063
Epoch 3/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0135 - val_loss: 0.0056
Epoch 4/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0125 - val_loss: 0.0056
Epoch 5/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0111 - val_loss: 0.0053
Epoch 6/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0100 - val_loss: 0.0054
Epoch 7/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0101 - val_loss: 0.0051
Epoch 8/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0088 - val_loss: 0.0048
Epoch 9/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.1494 - val_loss: 0.0098
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0265 - val_loss: 0.0076
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0201 - val_loss: 0.0074
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0183 - val_loss: 0.0072
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0166 - val_loss: 0.0067
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0149 - val_loss: 0.0068
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0144 - val_loss: 0.0064
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0152 - val_loss: 0.0063
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 0.0859 - val_loss: 0.0087
Epoch 2/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0142 - val_loss: 0.0072
Epoch 3/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0118 - val_loss: 0.0075
Epoch 4/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0120 - val_loss: 0.0064
Epoch 5/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0099 - val_loss: 0.0064
Epoch 6/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0098 - val_loss: 0.0061
Epoch 7/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0085 - val_loss: 0.0061
Epoch 8/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0090 - val_loss: 0.0068
Epoch 9/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 0.1626 - val_loss: 0.0144
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0218 - val_loss: 0.0121
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0180 - val_loss: 0.0117
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0156 - val_loss: 0.0114
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0157 - val_loss: 0.0115
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0154 - val_loss: 0.0103
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0139 - val_loss: 0.0100
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0132 - val_loss: 0.0100
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 0.1338 - val_loss: 0.0173
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0243 - val_loss: 0.0125
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0186 - val_loss: 0.0100
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0172 - val_loss: 0.0095
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0163 - val_loss: 0.0101
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0154 - val_loss: 0.0103
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0157 - val_loss: 0.0093
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0143 - val_loss: 0.0092
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

  pollutant_data = pollutant_data.fillna(method='ffill').fillna(method='bfill')
  super().__init__(**kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.1854 - val_loss: 0.0130
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0236 - val_loss: 0.0080
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0204 - val_loss: 0.0075
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0176 - val_loss: 0.0071
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0161 - val_loss: 0.0065
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0158 - val_loss: 0.0062
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0142 - val_loss: 0.0066
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0141 - val_loss: 0.0063
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

## Model Evaluation

- Evaluation of each trained model for each state and save the evaluation results

In [17]:
def evaluate_and_visualize(state_results, output_dir="evaluation_results"):
    """
    Evaluate models and visualize results, saving all outputs to structured directories
    
    Parameters:
    - state_results: Dictionary with results for each state
    - output_dir: Directory to save evaluation results
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Create directory for plots
    plots_dir = os.path.join(output_dir, "plots")
    os.makedirs(plots_dir, exist_ok=True)
    
    # Create directory for metrics
    metrics_dir = os.path.join(output_dir, "metrics")
    os.makedirs(metrics_dir, exist_ok=True)
    
    # Create summary dataframe for metrics across states
    metrics_summary = []
    
    for state, result in state_results.items():
        # Create state-specific directories
        state_dir = os.path.join(output_dir, state)
        os.makedirs(state_dir, exist_ok=True)
        
        # Save state metrics to CSV
        state_metrics = []
        for pollutant, metrics in result['metrics'].items():
            metrics_summary.append({
                'state': state,
                'pollutant': pollutant,
                'mse': metrics['mse'],
                'rmse': metrics['rmse'],
                'mae': metrics['mae'],
                'r2': metrics['r2']
            })
            state_metrics.append({
                'pollutant': pollutant,
                'mse': metrics['mse'],
                'rmse': metrics['rmse'],
                'mae': metrics['mae'],
                'r2': metrics['r2']
            })
        
        # Save state-specific metrics
        pd.DataFrame(state_metrics).to_csv(os.path.join(state_dir, f"{state}_metrics.csv"), index=False)
        
        # Plot and save comparison of predicted vs actual for each pollutant in the state
        plt.figure(figsize=(20, 15))
        
        for i, pollutant in enumerate(POLLUTANTS):
            plt.subplot(4, 2, i+1)
            
            # Plot actual vs predicted
            plt.plot(result['y_test'][:, i], label='Actual', alpha=0.7)
            plt.plot(result['y_pred'][:, i], label='Predicted', alpha=0.7)
            
            plt.title(f'{state} - {pollutant}')
            plt.xlabel('Time')
            plt.ylabel('Value')
            plt.legend()
            
            # Save individual pollutant plot
            plt.figure(figsize=(10, 6))
            plt.plot(result['y_test'][:, i], label='Actual', alpha=0.7)
            plt.plot(result['y_pred'][:, i], label='Predicted', alpha=0.7)
            plt.title(f'{state} - {pollutant}')
            plt.xlabel('Time')
            plt.ylabel('Value')
            plt.legend()
            plt.tight_layout()
            plt.savefig(os.path.join(state_dir, f"{state}_{pollutant}_prediction.png"))
            plt.close()
        
        plt.tight_layout()
        plt.savefig(os.path.join(state_dir, f"{state}_all_predictions.png"))
        plt.savefig(os.path.join(plots_dir, f"{state}_all_predictions.png"))
        plt.close()
        
        # Plot and save training history
        plt.figure(figsize=(10, 6))
        plt.plot(result['history'].history['loss'], label='Training Loss')
        plt.plot(result['history'].history['val_loss'], label='Validation Loss')
        plt.title(f'{state} - Training History')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig(os.path.join(state_dir, f"{state}_training_history.png"))
        plt.savefig(os.path.join(plots_dir, f"{state}_training_history.png"))
        plt.close()
        
        # Save actual vs predicted values as CSV
        pred_df = pd.DataFrame()
        for i, pollutant in enumerate(POLLUTANTS):
            pred_df[f"{pollutant}_actual"] = result['y_test'][:, i]
            pred_df[f"{pollutant}_predicted"] = result['y_pred'][:, i]
        
        pred_df.to_csv(os.path.join(state_dir, f"{state}_predictions.csv"), index=False)
    
    # Create and save overall metrics dataframe
    metrics_df = pd.DataFrame(metrics_summary)
    metrics_df.to_csv(os.path.join(metrics_dir, "all_metrics.csv"), index=False)
    
    # Save average metrics by pollutant - FIX: explicitly select numeric columns only
    numeric_columns = ['mse', 'rmse', 'mae', 'r2']
    avg_metrics = metrics_df.groupby('pollutant')[numeric_columns].mean()
    avg_metrics.to_csv(os.path.join(metrics_dir, "avg_metrics_by_pollutant.csv"))
    print("\nAverage metrics across all states:")
    print(avg_metrics)
    
    # Save average metrics by state - FIX: explicitly select numeric columns only
    state_avg_metrics = metrics_df.groupby('state')[numeric_columns].mean()
    state_avg_metrics.to_csv(os.path.join(metrics_dir, "avg_metrics_by_state.csv"))
    
    # Create and save heatmap of R² scores by pollutant and state
    r2_matrix = metrics_df.pivot(index='state', columns='pollutant', values='r2')
    r2_matrix.to_csv(os.path.join(metrics_dir, "r2_matrix.csv"))
    
    plt.figure(figsize=(12, 10))
    sns.heatmap(r2_matrix, annot=True, cmap='YlGnBu', fmt='.2f')
    plt.title('R² Score by State and Pollutant')
    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, 'r2_heatmap.png'))
    plt.close()
    
    # Create and save heatmaps for other metrics
    for metric in ['mse', 'rmse', 'mae']:
        metric_matrix = metrics_df.pivot(index='state', columns='pollutant', values=metric)
        metric_matrix.to_csv(os.path.join(metrics_dir, f"{metric}_matrix.csv"))
        
        plt.figure(figsize=(12, 10))
        sns.heatmap(metric_matrix, annot=True, cmap='YlGnBu', fmt='.2f')
        plt.title(f'{metric.upper()} by State and Pollutant')
        plt.tight_layout()
        plt.savefig(os.path.join(plots_dir, f'{metric}_heatmap.png'))
        plt.close()
    
    # Generate a summary report in HTML format
    html_report = f"""
    <html>
    <head>
        <title>Air Pollutant Prediction Results</title>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 20px; }}
            h1, h2, h3 {{ color: #2c3e50; }}
            table {{ border-collapse: collapse; width: 100%; margin-bottom: 20px; }}
            th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
            th {{ background-color: #f2f2f2; }}
            tr:nth-child(even) {{ background-color: #f9f9f9; }}
            .metric-summary {{ margin-bottom: 30px; }}
            img {{ max-width: 100%; height: auto; margin: 10px 0; }}
        </style>
    </head>
    <body>
        <h1>Air Pollutant Prediction - Model Evaluation Report</h1>
        <p>Generated on {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
        
        <div class="metric-summary">
            <h2>Average Metrics by Pollutant</h2>
            {avg_metrics.to_html()}
        </div>
        
        <div class="metric-summary">
            <h2>Average Metrics by State</h2>
            {state_avg_metrics.to_html()}
        </div>
        
        <h2>Performance Heatmaps</h2>
        <p>R² Score by State and Pollutant:</p>
        <img src="plots/r2_heatmap.png" alt="R² Heatmap">
        
        <p>RMSE by State and Pollutant:</p>
        <img src="plots/rmse_heatmap.png" alt="RMSE Heatmap">
        
        <h2>State-wise Results</h2>
    """
    
    # Add links to individual state results
    for state in state_results.keys():
        html_report += f"""
        <h3>{state}</h3>
        <p><a href="{state}/{state}_metrics.csv">Download Metrics CSV</a></p>
        <p><a href="{state}/{state}_predictions.csv">Download Predictions CSV</a></p>
        <p>All Pollutants Prediction:</p>
        <img src="{state}/{state}_all_predictions.png" alt="{state} Predictions">
        <p>Training History:</p>
        <img src="{state}/{state}_training_history.png" alt="{state} Training History">
        """
    
    html_report += """
    </body>
    </html>
    """
    
    with open(os.path.join(output_dir, "evaluation_report.html"), "w") as f:
        f.write(html_report)
    
    print(f"\nEvaluation results saved to {output_dir}/")
    print(f"Summary report generated at {output_dir}/evaluation_report.html")

In [18]:

evaluate_and_visualize(state_results, output_dir="/kaggle/working/all_aqi_prediction_results")


Average metrics across all states:
                     mse        rmse         mae        r2
pollutant                                                 
co         105177.476211  238.499360  166.616218  0.556043
nh3            23.303343    4.011266    2.729032  0.502956
no             23.908908    3.185542    1.818769  0.195052
no2          1186.300616   34.377017    9.665001  0.087585
o3            394.847555   19.354378   12.513625  0.490102
pm10         1415.324934   30.462693   19.899038  0.696800
pm2_5         933.988980   25.003578   16.635395  0.747214
so2           140.841294    8.459181    6.302401  0.316944

Evaluation results saved to /kaggle/working/all_aqi_prediction_results/
Summary report generated at /kaggle/working/all_aqi_prediction_results/evaluation_report.html


In [19]:
import shutil

# Define the folder path and the output zip file path (without extension)
folder_path = "/kaggle/working/all_aqi_prediction_results"
zip_basename = folder_path  # This will create "aqi_prediction_results.zip"

# Zip the folder
shutil.make_archive(zip_basename, 'zip', folder_path)

print("Folder zipped successfully!")


Folder zipped successfully!


In [20]:
import shutil

# Define the folder path and the output zip file path (without extension)
folder_path = "/kaggle/working/trained_models"
zip_basename = folder_path  # This will create "aqi_prediction_results.zip"

# Zip the folder
shutil.make_archive(zip_basename, 'zip', folder_path)

print("Folder zipped successfully!")


Folder zipped successfully!


## Function to load the saved model and predict for any sequence of new data

In [None]:
def load_model_and_predict(state, new_data, models_dir="trained_models"):
    """
    Load a trained model and its scaler for a specific state and make predictions
    
    Parameters:
    - state: State name for which to load the model
    - new_data: DataFrame with new data for prediction
    - models_dir: Directory where models are saved
    
    Returns:
    - DataFrame with predictions
    """
    import pickle
    import json
    import numpy as np
    import pandas as pd
    import tensorflow as tf
    
    # Paths
    state_model_dir = os.path.join(models_dir, state)
    model_path = os.path.join(state_model_dir, f"{state}_model.h5")
    scaler_path = os.path.join(state_model_dir, f"{state}_scaler.pkl")
    config_path = os.path.join(state_model_dir, f"{state}_config.json")
    
    # Load model
    model = tf.keras.models.load_model(model_path)
    print(f"Loaded model from {model_path}")
    
    # Load scaler
    with open(scaler_path, 'rb') as f:
        scaler = pickle.load(f)
    print(f"Loaded scaler from {scaler_path}")
    
    # Load configuration
    with open(config_path, 'r') as f:
        config = json.load(f)
    
    pollutants = config['pollutants']
    sequence_length = config['sequence_length']
    
    # Prepare new data
    # Extract only pollutant columns
    input_data = new_data[pollutants].copy()
    
    # Handle missing values
    input_data = input_data.fillna(method='ffill').fillna(method='bfill')
    
    # Scale the data
    scaled_input = scaler.transform(input_data)
    
    # Create sequences
    X = []
    for i in range(len(scaled_input) - sequence_length + 1):
        X.append(scaled_input[i:i+sequence_length])
    
    X = np.array(X)
    
    # Make predictions
    scaled_predictions = model.predict(X)
    
    # Inverse transform predictions
    predictions = scaler.inverse_transform(scaled_predictions)
    
    # Create result DataFrame
    result_df = pd.DataFrame()
    
    if 'date' in new_data.columns:
        # Shift dates forward by sequence_length to align with predictions
        result_df['date'] = new_data['date'].iloc[sequence_length:].reset_index(drop=True)
    
    # Add predictions for each pollutant
    for i, pollutant in enumerate(pollutants):
        result_df[f'{pollutant}_predicted'] = predictions[:, i]
    
    return result_df

# Example usage:
"""
# Load new data for a specific state
new_data = pd.read_csv('path/to/new_data.csv')

# Make predictions
predictions = load_model_and_predict('Maharashtra', new_data)
print(predictions.head())

# Plot predictions
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 10))
for i, pollutant in enumerate(['pm2_5', 'pm10', 'so2', 'no', 'no2', 'nh3', 'o3', 'co']):
    plt.subplot(4, 2, i+1)
    plt.plot(predictions['date'], predictions[f'{pollutant}_predicted'])
    plt.title(f'Predicted {pollutant}')
    plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
"""