In [None]:
# for dataset preparation 
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

def generate_timestamps():
    start_date = datetime(2025, 4, 7, 15, 0, 0)  
    days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    timestamps = []
    for day_idx in range(7):
        day_start = start_date + timedelta(days=day_idx)
        for sec in range(1800): 
            timestamps.append((day_start + timedelta(seconds=sec), days[day_idx]))
    return timestamps


def generate_synthetic_dataset():
    timestamps = generate_timestamps()  
    data = {
        'timestamp': [],
        'day': [],
        'red_light_in_less_than_60_sec': [],
        'current_cctv_left': [],
        'current_cctv_right': [],
        'vehicles_in_60s_left': [],
        'vehicles_in_60s_right': []
    }

    for ts, day in timestamps:
        minute = ts.minute
        is_weekday = day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

        for red_light in ["Yes", "No"]:
            if is_weekday and 0 <= minute <= 5:  
                current_left = np.random.randint(4, 6)  
                current_right = np.random.randint(4, 6)
            else:
                current_left = np.random.randint(0, 3) if not is_weekday else np.random.randint(1, 3)
                current_right = np.random.randint(0, 3) if not is_weekday else np.random.randint(1, 3)

            if red_light == "Yes":
                increase_left = np.random.randint(3, 7)
                increase_right = np.random.randint(3, 7)
                future_left = current_left + increase_left
                future_right = current_right + increase_right
            elif is_weekday and 0 <= minute <= 5:  
                future_left = np.random.randint(7, 9)
                future_right = np.random.randint(7, 9)
            else:  
                change_left = np.random.choice([0, 1, 2], p=[0.7, 0.2, 0.1])
                change_right = np.random.choice([0, 1, 2], p=[0.7, 0.2, 0.1])
                future_left = current_left + change_left
                future_right = current_right + change_right

            future_left = max(0, future_left)
            future_right = max(0, future_right)

            data['timestamp'].append(ts)
            data['day'].append(day)
            data['red_light_in_less_than_60_sec'].append(red_light)
            data['current_cctv_left'].append(current_left)
            data['current_cctv_right'].append(current_right)
            data['vehicles_in_60s_left'].append(future_left)
            data['vehicles_in_60s_right'].append(future_right)

    return pd.DataFrame(data)

def split_dataset(df):
    total_size = len(df)
    train_size = int(total_size * 0.7)  
    valid_size = int(total_size * 0.15)  
    test_size = total_size - train_size - valid_size  

    df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)

    train_df = df_shuffled.iloc[:train_size]
    valid_df = df_shuffled.iloc[train_size:train_size + valid_size]
    test_df = df_shuffled.iloc[train_size + valid_size:]

    return train_df, valid_df, test_df

dataset = generate_synthetic_dataset()
train_df, valid_df, test_df = split_dataset(dataset)

train_df.to_csv("/content/train_dataset_7days_3pm_330pm_25200.csv", index=False)
valid_df.to_csv("/content/valid_dataset_7days_3pm_330pm_25200.csv", index=False)
test_df.to_csv("/content/test_dataset_7days_3pm_330pm_25200.csv", index=False)

print("Training Data Sample:\n", train_df.head())
print("\nValidation Data Sample:\n", valid_df.head())
print("\nTest Data Sample:\n", test_df.head())
print("\nDataset Sizes:")
print(f"Train: {len(train_df)}, Validation: {len(valid_df)}, Test: {len(test_df)}")
print("\nRed Light Distribution in Full Dataset:")
print(dataset['red_light_in_less_than_60_sec'].value_counts())
print("\nDay Distribution in Full Dataset:")
print(dataset['day'].value_counts())
print("\nTimestamp Range Check:")
print(f"Min: {dataset['timestamp'].min()}, Max: {dataset['timestamp'].max()}")

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Concatenate, Lambda, Multiply
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import KFold
import joblib
from datetime import datetime

physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    print(f"GPU devices available: {len(physical_devices)}")
    try:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)
        print("Using GPU for training with memory growth enabled")
    except RuntimeError as e:
        print(f"Memory growth setting error: {e}")
else:
    print("No GPU available, using CPU")

train_df = pd.read_csv("/content/train_dataset_7days_3pm_330pm_25200.csv")
valid_df = pd.read_csv("/content/valid_dataset_7days_3pm_330pm_25200.csv")

le_day = LabelEncoder()
train_df['day'] = le_day.fit_transform(train_df['day'])
valid_df['day'] = le_day.transform(valid_df['day'])
train_df['red_light_in_less_than_60_sec'] = train_df['red_light_in_less_than_60_sec'].map({'Yes': 1, 'No': 0})
valid_df['red_light_in_less_than_60_sec'] = valid_df['red_light_in_less_than_60_sec'].map({'Yes': 1, 'No': 0})

train_df['red_light_importance'] = train_df['red_light_in_less_than_60_sec'] * 3
valid_df['red_light_importance'] = valid_df['red_light_in_less_than_60_sec'] * 3

scaler = MinMaxScaler()
features = ['current_cctv_left', 'current_cctv_right', 'day', 'red_light_in_less_than_60_sec', 'red_light_importance']
target = ['vehicles_in_60s_left', 'vehicles_in_60s_right']
train_scaled = scaler.fit_transform(train_df[features + target])
valid_scaled = scaler.transform(valid_df[features + target])

joblib.dump(scaler, 'scaler.pkl')
joblib.dump(le_day, 'label_encoder_day.pkl')

def create_sequences(data, lookback=10):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i, :len(features)])
        y.append(data[i, len(features):])
    return np.array(X), np.array(y)

lookback = 10
X_train, y_train = create_sequences(train_scaled)
X_valid, y_valid = create_sequences(valid_scaled)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')
callbacks = [early_stopping, model_checkpoint]

def create_lstm_model_with_red_light_attention(input_shape):
    main_input = Input(shape=input_shape)
    red_light_lambda = Lambda(lambda x: x[:, :, 3:4])(main_input)
    lstm1 = LSTM(64,
                 return_sequences=True,
                 activation='tanh',
                 recurrent_activation='sigmoid',
                 recurrent_dropout=0.0,
                 unroll=False,
                 use_bias=True)(main_input)
    lstm1_dropout = Dropout(0.2)(lstm1)
    lstm2 = LSTM(32,
                 activation='tanh',
                 recurrent_activation='sigmoid',
                 recurrent_dropout=0.0,
                 unroll=False,
                 use_bias=True)(lstm1_dropout)
    lstm2_dropout = Dropout(0.2)(lstm2)
    red_light_attention = Lambda(lambda x: x[:, -1:, 0])(red_light_lambda)
    red_light_attention = Dense(32, activation='sigmoid')(red_light_attention)
    weighted_features = Multiply()([lstm2_dropout, red_light_attention])
    combined = Concatenate()([lstm2_dropout, weighted_features])
    dense1 = Dense(32, activation='relu')(combined)
    dropout = Dropout(0.2)(dense1)
    dense2 = Dense(16, activation='relu')(dropout)
    output = Dense(2)(dense2)
    model = Model(inputs=main_input, outputs=output)
    model.compile(optimizer='adam', loss='mse')
    return model

def calculate_rmse(y_true, y_pred):
    return np.sqrt(np.mean(np.square(y_true - y_pred)))

def analyze_red_light_impact(model, X, y):
    red_light_indices = [i for i in range(len(X)) if X[i, -1, 3] > 0.5]
    no_red_light_indices = [i for i in range(len(X)) if X[i, -1, 3] <= 0.5]
    all_preds = model.predict(X, verbose=0)
    red_light_rmse = calculate_rmse(y[red_light_indices], all_preds[red_light_indices]) if len(red_light_indices) > 0 else None
    no_red_light_rmse = calculate_rmse(y[no_red_light_indices], all_preds[no_red_light_indices]) if len(no_red_light_indices) > 0 else None
    return {
        'red_light_count': len(red_light_indices),
        'no_red_light_count': len(no_red_light_indices),
        'red_light_rmse': red_light_rmse,
        'no_red_light_rmse': no_red_light_rmse
    }

num_models = 3
ensemble_models = []
fold_histories = []
all_train_rmse = []
all_val_rmse = []

print("Training ensemble of models with enhanced red light feature importance...")
for model_idx in range(num_models):
    print(f"\nTraining model {model_idx+1}/{num_models}")
    model = create_lstm_model_with_red_light_attention((lookback, len(features)))
    if model_idx == 0:
        X_train_model, y_train_model = X_train, y_train
    elif model_idx == 1:
        X_train_model = X_train + np.random.normal(0, 0.01, X_train.shape)
        y_train_model = y_train
        X_train_model[:, :, 3] *= 1.5
    else:
        red_light_samples = [i for i in range(len(X_train)) if X_train[i, -1, 3] > 0.5]
        non_red_light_samples = [i for i in range(len(X_train)) if X_train[i, -1, 3] <= 0.5]
        if len(red_light_samples) > 0 and len(non_red_light_samples) > 0:
            red_light_samples = np.random.choice(red_light_samples, size=min(len(red_light_samples)*2, len(non_red_light_samples)), replace=len(red_light_samples)*2 > len(red_light_samples))
            non_red_light_samples = np.random.choice(non_red_light_samples, size=len(red_light_samples), replace=False)
            balanced_indices = np.concatenate([red_light_samples, non_red_light_samples])
            np.random.shuffle(balanced_indices)
            X_train_model = X_train[balanced_indices]
            y_train_model = y_train[balanced_indices]
        else:
            X_train_model, y_train_model = X_train, y_train
    train_rmse = []
    val_rmse = []
    class RMSECallback(tf.keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs=None):
            train_pred = self.model.predict(X_train_model, verbose=0)
            train_rmse_val = calculate_rmse(y_train_model, train_pred)
            train_rmse.append(train_rmse_val)
            val_pred = self.model.predict(X_valid, verbose=0)
            val_rmse_val = calculate_rmse(y_valid, val_pred)
            val_rmse.append(val_rmse_val)
            if (epoch + 1) % 5 == 0 or epoch == 0:
                impact = analyze_red_light_impact(self.model, X_valid, y_valid)
                print(f"Red light analysis - Cases with red light: {impact['red_light_count']}, RMSE: {impact['red_light_rmse']:.4f}, Cases without: {impact['no_red_light_count']}, RMSE: {impact['no_red_light_rmse']:.4f}")
            print(f"Epoch {epoch+1}: Train RMSE = {train_rmse_val:.4f}, Val RMSE = {val_rmse_val:.4f}")
    callbacks_with_rmse = callbacks + [RMSECallback()]
    history = model.fit(
        X_train_model, y_train_model,
        epochs=15,
        batch_size=64,
        validation_data=(X_valid, y_valid),
        callbacks=callbacks_with_rmse,
        verbose=1
    )
    model.save(f"lstm_traffic_model_{model_idx}.h5")
    ensemble_models.append(model)
    history_dict = history.history
    history_dict['train_rmse'] = train_rmse
    history_dict['val_rmse'] = val_rmse
    fold_histories.append(history_dict)
    all_train_rmse.append(train_rmse)
    all_val_rmse.append(val_rmse)
    impact = analyze_red_light_impact(model, X_valid, y_valid)
    print(f"\nFinal Red Light Impact Analysis for Model {model_idx+1}:")
    print(f"- Cases with red light: {impact['red_light_count']}, RMSE: {impact['red_light_rmse']:.4f}")
    print(f"- Cases without red light: {impact['no_red_light_count']}, RMSE: {impact['no_red_light_rmse']:.4f}")
    print(f"Model {model_idx+1} final validation RMSE: {val_rmse[-1]:.4f}")

plt.figure(figsize=(18, 15))
plt.subplot(3, 2, 1)
for i, history in enumerate(fold_histories):
    plt.plot(history['loss'], linestyle='--', label=f'Model {i+1} train loss')
    plt.plot(history['val_loss'], label=f'Model {i+1} val loss')
plt.title('Mean Squared Error Loss for Each Model')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.subplot(3, 2, 2)
for i in range(len(ensemble_models)):
    plt.plot(all_train_rmse[i], linestyle='--', label=f'Model {i+1} train RMSE')
    plt.plot(all_val_rmse[i], label=f'Model {i+1} val RMSE')
plt.title('Root Mean Squared Error for Each Model')
plt.xlabel('Epoch')
plt.ylabel('RMSE')
plt.legend()
plt.grid(True)
plt.subplot(3, 2, 3)
all_model_preds = [model.predict(X_valid[:100], verbose=0) for model in ensemble_models]
ensemble_preds = np.mean(all_model_preds, axis=0)
plt.scatter(range(100), y_valid[:100, 0], color='blue', label='Actual Left', alpha=0.7)
plt.scatter(range(100), ensemble_preds[:100, 0], color='red', label='Predicted Left', alpha=0.7)
plt.title('Ensemble Predictions vs Actual (Left Vehicles)')
plt.xlabel('Sample Index')
plt.ylabel('Normalized Vehicle Count')
plt.legend()
plt.grid(True)
plt.subplot(3, 2, 4)
plt.scatter(range(100), y_valid[:100, 1], color='green', label='Actual Right', alpha=0.7)
plt.scatter(range(100), ensemble_preds[:100, 1], color='orange', label='Predicted Right', alpha=0.7)
plt.title('Ensemble Predictions vs Actual (Right Vehicles)')
plt.xlabel('Sample Index')
plt.ylabel('Normalized Vehicle Count')
plt.legend()
plt.grid(True)
plt.subplot(3, 2, 5)
red_light_indices = [i for i in range(len(X_valid)) if X_valid[i, -1, 3] > 0.5]
no_red_light_indices = [i for i in range(len(X_valid)) if X_valid[i, -1, 3] <= 0.5]
all_preds = np.array([model.predict(X_valid, verbose=0) for model in ensemble_models])
ensemble_preds_all = np.mean(all_preds, axis=0)
if len(red_light_indices) > 0:
    red_light_errors = np.sqrt(np.mean(np.square(y_valid[red_light_indices] - ensemble_preds_all[red_light_indices]), axis=1))
    plt.hist(red_light_errors, alpha=0.7, bins=20, color='red', label='Red Light Errors')
if len(no_red_light_indices) > 0:
    no_red_light_errors = np.sqrt(np.mean(np.square(y_valid[no_red_light_indices] - ensemble_preds_all[no_red_light_indices]), axis=1))
    plt.hist(no_red_light_errors, alpha=0.7, bins=20, color='green', label='No Red Light Errors')
plt.title('Error Distribution by Red Light Status')
plt.xlabel('Prediction Error (RMSE)')
plt.ylabel('Frequency')
plt.legend()
plt.grid(True)
plt.subplot(3, 2, 6)
if len(red_light_indices) > 0:
    red_light_y = y_valid[red_light_indices].flatten()
    red_light_pred = ensemble_preds_all[red_light_indices].flatten()
    plt.scatter(red_light_y, red_light_pred, color='red', alpha=0.5, label='Red Light')
if len(no_red_light_indices) > 0:
    no_red_light_y = y_valid[no_red_light_indices].flatten()
    no_red_light_pred = ensemble_preds_all[no_red_light_indices].flatten()
    plt.scatter(no_red_light_y, no_red_light_pred, color='green', alpha=0.5, label='No Red Light')
max_val = max(np.max(y_valid), np.max(ensemble_preds_all))
min_val = min(np.min(y_valid), np.min(ensemble_preds_all))
plt.plot([min_val, max_val], [min_val, max_val], 'k--')
plt.title('Actual vs Predicted by Red Light Status')
plt.xlabel('Actual Value')
plt.ylabel('Predicted Value')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig('enhanced_red_light_metrics.png', dpi=300)
plt.show()

def test_ensemble_model():
    try:
        scaler = joblib.load("scaler.pkl")
        le_day = joblib.load("label_encoder_day.pkl")
        model_paths = [f"lstm_traffic_model_{i}.h5" for i in range(num_models)]
        models = [load_model(path) for path in model_paths]
        print(f"✅ {len(models)} models, scaler, and label encoder loaded successfully.")
    except Exception as e:
        print(f"❌ Error loading models or preprocessors: {e}")
        return None
    def preprocess_input(timestamp_str, day, red_light, current_left, current_right, lookback=10):
        try:
            timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
            day_encoded = le_day.transform([day])[0]
            red_light_encoded = 1 if red_light.lower() == "yes" else 0
            red_light_importance = red_light_encoded * 3
            input_data = [current_left, current_right, day_encoded, red_light_encoded, red_light_importance]
            print(f"📝 Raw input data: {input_data}")
            print(f"🚦 Red light status: {'YES (Important)' if red_light_encoded == 1 else 'NO'}")
            sequence = np.array([input_data] * lookback)
            dummy_target = [0, 0]
            sequence_with_dummy = np.hstack((sequence, np.array([dummy_target] * lookback)))
            scaled_sequence = scaler.transform(sequence_with_dummy)
            scaled_sequence = scaled_sequence[:, :len(input_data)]
            print(f"📊 Scaled sequence shape: {scaled_sequence.shape}")
            return np.expand_dims(scaled_sequence, axis=0)
        except Exception as e:
            print(f"❌ Error in preprocessing: {e}")
            return None
    def predict_traffic(timestamp_str, day, red_light, current_left, current_right):
        X_test = preprocess_input(timestamp_str, day, red_light, current_left, current_right)
        if X_test is None:
            return None
        try:
            predictions = [model.predict(X_test, verbose=0) for model in models]
            for i, pred in enumerate(predictions):
                print(f"Model {i+1} prediction: {pred[0]}")
            pred_scaled = np.mean(predictions, axis=0)
            print(f"🔢 Ensemble scaled prediction: {pred_scaled}")
            dummy_features = np.zeros((1, 5))
            pred_with_dummy = np.hstack((dummy_features, pred_scaled))
            pred = scaler.inverse_transform(pred_with_dummy)[:, 5:]
            return pred[0]
        except Exception as e:
            print(f"❌ Error in prediction: {e}")
            return None
    print("\n----- TESTING WITH RED LIGHT = YES -----")
    prediction_red = predict_traffic("2025-04-07 15:03:22", "Monday", "Yes", 5, 4)
    print("\n----- TESTING WITH RED LIGHT = NO -----")
    prediction_no_red = predict_traffic("2025-04-07 15:03:22", "Monday", "No", 5, 4)
    if prediction_red is not None and prediction_no_red is not None:
        print("\n----- IMPACT OF RED LIGHT ON PREDICTIONS -----")
        print(f"🚦 WITH Red Light: Left={prediction_red[0]:.1f}, Right={prediction_red[1]:.1f}")
        print(f"🚦 WITHOUT Red Light: Left={prediction_no_red[0]:.1f}, Right={prediction_no_red[1]:.1f}")
        print(f"🔄 Difference: Left={prediction_red[0]-prediction_no_red[0]:.1f}, Right={prediction_red[1]-prediction_no_red[1]:.1f}")
    return prediction_red

print("\n----- TESTING ENSEMBLE MODEL WITH ENHANCED RED LIGHT FEATURE -----")
test_ensemble_model()