<a href="https://colab.research.google.com/github/gowrisankar393/vaylen-transitlk/blob/Multi-Sensor-Fusion-Crash-Detection/TransitLK_MSFCD_CVP_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Setup & Environment

In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from imblearn.over_sampling import RandomOverSampler
from google.colab import files, drive
from google.colab.patches import cv2_imshow

print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 2.19.0


Mount Drive & Load Roboflow Dataset

In [7]:
#mount drive
drive.mount('/content/drive')

#dataset
DATASET_PATH = '/content/drive/MyDrive/TransitLK_MSFCD.v1i.multiclass'

print("Dataset Path:", DATASET_PATH)

#verify structure
for split in ['train', 'valid', 'test']:
    split_path = os.path.join(DATASET_PATH, split)
    if os.path.exists(split_path):
        print(f"{split} folder found: {len(os.listdir(split_path))} files")
    else:
        print(f"{split} folder missing")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset Path: /content/drive/MyDrive/TransitLK_MSFCD.v1i.multiclass
train folder missing
valid folder missing
test folder missing


Load & Parse Class Labels

In [8]:
print("Loading Class Definitions")

#_classes.csv from train folder (Roboflow format)
classes_csv = os.path.join(DATASET_PATH, 'train', '_classes.csv')

if os.path.exists(classes_csv):
    #_classes.csv is usually: class_name,class_id
    class_df = pd.read_csv(classes_csv)
    print("Class mapping:")
    print(class_df)

    #create label dictionary
    class_names = class_df['class_name'].tolist()
    NUM_CLASSES = len(class_names)
    CLASS_INDICES = range(NUM_CLASSES)

    print(f"\nâœ… Found {NUM_CLASSES} classes:")
    for idx, name in enumerate(class_names):
        print(f"   {idx}: {name}")
else:
    #if no _classes.csv
    print("No _classes.csv found. Assuming binary classification (crash/normal)")
    class_names = ['normal', 'crash']
    NUM_CLASSES = 2

Loading Class Definitions
No _classes.csv found. Assuming binary classification (crash/normal)


Load & EDA - Image Data

In [9]:
print("Exploratory Data Analysis\n")

def load_images_from_folder(folder_path, label):
    #load images and labels from folder
    images = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(folder_path, filename)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            images.append(img)
            labels.append(label)
    return images, labels

#load sample of data for EDA
SAMPLE_SIZE = 50

all_images = []
all_labels = []

for idx, class_name in enumerate(class_names):
    class_folder = os.path.join(DATASET_PATH, 'train', class_name)

    if os.path.exists(class_folder):
        #subfolders of classes
        images, labels = load_images_from_folder(class_folder, idx)
        all_images.extend(images[:SAMPLE_SIZE//NUM_CLASSES])
        all_labels.extend(labels[:SAMPLE_SIZE//NUM_CLASSES])
        print(f"Loaded {len(images)} images for class '{class_name}'")
    else:
        print(f"Folder not found: {class_folder}")

print(f"\nTotal loaded: {len(all_images)} images")

#display sample images
plt.figure(figsize=(15, 5))
for i in range(min(15, len(all_images))):
    plt.subplot(3, 5, i+1)
    plt.imshow(all_images[i])
    plt.title(f"Class: {class_names[all_labels[i]]}")
    plt.axis('off')
plt.suptitle('Sample Training Images', fontsize=16)
plt.show()

#image statistics
heights = [img.shape[0] for img in all_images]
widths = [img.shape[1] for img in all_images]

print(f"\nImage Statistics:")
print(f"   Average height: {np.mean(heights):.0f}px")
print(f"   Average width: {np.mean(widths):.0f}px")
print(f"   Height range: {min(heights)}-{max(heights)}px")
print(f"   Width range: {min(widths)}-{max(widths)}px")

Exploratory Data Analysis

Folder not found: /content/drive/MyDrive/TransitLK_MSFCD.v1i.multiclass/train/normal
Folder not found: /content/drive/MyDrive/TransitLK_MSFCD.v1i.multiclass/train/crash

Total loaded: 0 images


<Figure size 1500x500 with 0 Axes>


Image Statistics:
   Average height: nanpx
   Average width: nanpx


ValueError: min() iterable argument is empty

Data Preprocessing & Augmentation

In [None]:
print("Data Preprocessing & Augmentation\n")

#target image size for model
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

#data augmentation for training to prevent overfitting
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal", input_shape=(224, 224, 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
])

#preprocessing for validation/test
preprocess_input = keras.applications.mobilenet_v2.preprocess_input

print(f"Image size set to: {IMG_SIZE}")
print(f"Batch size: {BATCH_SIZE}")

#use keras for efficient loading
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#training generator with augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    rotation_range=10,
    zoom_range=0.1,
    shear_range=0.1
)

#validation/test generator without augmentation
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    os.path.join(DATASET_PATH, 'train'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    os.path.join(DATASET_PATH, 'valid'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

test_generator = val_datagen.flow_from_directory(
    os.path.join(DATASET_PATH, 'test'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

print(f"\nData generators created:")
print(f"   Train batches: {len(train_generator)}")
print(f"   Val batches: {len(val_generator)}")
print(f"   Test batches: {len(test_generator)}")

Handle Class Imbalance

In [None]:
print("Handling Class Imbalance\n")

#check class distribution
print("\nTraining set class distribution:")
class_counts = {i: 0 for i in range(NUM_CLASSES)}
for i in range(len(train_generator)):
    _, labels = train_generator[i]
    for label in labels:
        class_counts[np.argmax(label)] += 1

for idx, count in class_counts.items():
    print(f"   {class_names[idx]}: {count} images")

#calculate class weights
total = sum(class_counts.values())
class_weights = {idx: total/(NUM_CLASSES * count) for idx, count in class_counts.items()}

print(f"\nðŸ“Š Class Weights for training:")
for idx, weight in class_weights.items():
    print(f"   {class_names[idx]}: {weight:.2f}")

#save weights for later use
with open('class_weights.json', 'w') as f:
    json.dump(class_weights, f, indent=2)

Build CNN Architecture

In [None]:
print("Building CNN Architecture\n")


#Transfer Learning: MobileNetV2 (lightweight for mobile devices)*
base_model = keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,  #don't include classification head
    weights='imagenet'  #use pre-trained weights
)

#freeze base model initially
base_model.trainable = False

#build the full model
model = keras.Sequential([
    data_augmentation,
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),  # Prevent overfitting
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

#compile with adam optimizer and categorical crossentropy
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("Model architecture built:")
print(f"   Base: MobileNetV2 (frozen)")
print(f"   Trainable params: {model.trainable_variables}")
print(f"   Classes: {NUM_CLASSES}")
print(f"   Optimizer: Adam")

Train the Model

In [None]:
print("Training CNN Model\n")

#callbacks for better training
reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7
)

early_stop = callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True
)

#train model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=50,
    validation_data=val_generator,
    validation_steps=len(val_generator),
    class_weight=class_weights,
    callbacks=[reduce_lr, early_stop],
    verbose=1
)

print("Training complete!")

#plot training history
def plot_training_history(history):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))

    #accuracy
    axes[0,0].plot(history.history['accuracy'], label='Train')
    axes[0,0].plot(history.history['val_accuracy'], label='Val')
    axes[0,0].set_title('Model Accuracy', fontweight='bold')
    axes[0,0].set_xlabel('Epoch')
    axes[0,0].set_ylabel('Accuracy')
    axes[0,0].legend()
    axes[0,0].grid(True)

    #loss
    axes[0,1].plot(history.history['loss'], label='Train')
    axes[0,1].plot(history.history['val_loss'], label='Val')
    axes[0,1].set_title('Model Loss', fontweight='bold')
    axes[0,1].set_xlabel('Epoch')
    axes[0,1].set_ylabel('Loss')
    axes[0,1].legend()
    axes[0,1].grid(True)

    #precision
    axes[1,0].plot(history.history['precision'], label='Train')
    axes[1,0].plot(history.history['val_precision'], label='Val')
    axes[1,0].set_title('Precision', fontweight='bold')
    axes[1,0].legend()
    axes[1,0].grid(True)

    #recall
    axes[1,1].plot(history.history['recall'], label='Train')
    axes[1,1].plot(history.history['val_recall'], label='Val')
    axes[1,1].set_title('Recall', fontweight='bold')
    axes[1,1].legend()
    axes[1,1].grid(True)

    plt.tight_layout()
    plt.show()

plot_training_history(history)

Model Evaluation & Confusion Matrix

In [None]:
print("Evaluating Model on Test Set\n")

#evaluate on test set
test_loss, test_acc, test_precision, test_recall = model.evaluate(
    test_generator, steps=len(test_generator)
)

print(f"\nTest Accuracy: {test_acc:.2%}")
print(f"   Precision: {test_precision:.2%}")
print(f"   Recall: {test_recall:.2%}")

#predict on test set
y_test_true = []
y_test_pred = []

for i in range(len(test_generator)):
    X_batch, y_batch = test_generator[i]
    preds = model.predict(X_batch)

    y_test_true.extend(np.argmax(y_batch, axis=1))
    y_test_pred.extend(np.argmax(preds, axis=1))

#confusion Matrix
cm = confusion_matrix(y_test_true, y_test_pred)
print(f"\nConfusion Matrix:")
print(cm)

#visualize
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names,
            yticklabels=class_names)
plt.title('Confusion Matrix - Test Set', fontsize=14, fontweight='bold')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

#classification Report
print("\nClassification Report:")
print(classification_report(y_test_true, y_test_pred,
                            target_names=class_names))

Feature Importance & Model Interpretation

In [None]:
print("Feature Importance & Model Interpretation\n")

#visualize what the model sees using GradCAM
from tensorflow.keras.models import Model

#extract the last convolutional layer
last_conv_layer = base_model.get_layer('Conv_1')
grad_model = Model([base_model.input], [last_conv_layer.output, model.output])

def generate_gradcam(image_array, class_index):
    """Generate Grad-CAM heatmap for a specific class"""
    with tf.GradientTape() as tape:
        conv_output, predictions = grad_model(image_array)
        loss = predictions[:, class_index]

    grads = tape.gradient(loss, conv_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_output = conv_output[0]
    heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_output), axis=-1)
    heatmap = np.maximum(heatmap, 0)  #relu
    heatmap /= np.max(heatmap)  #normalize

    return heatmap

#test on a sample crash image
sample_image_path = os.path.join(DATASET_PATH, 'test', 'crash', os.listdir(os.path.join(DATASET_PATH, 'test', 'crash'))[0])
sample_img = cv2.imread(sample_image_path)
sample_img = cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB)
sample_img_resized = cv2.resize(sample_img, IMG_SIZE)
sample_array = np.expand_dims(sample_img_resized, axis=0).astype(np.float32)

#get GradCAM
crash_class_idx = class_names.index('crash') if 'crash' in class_names else 0
heatmap = generate_gradcam(sample_array, crash_class_idx)

#overlay heatmap on original image
heatmap_resized = cv2.resize(heatmap, (sample_img.shape[1], sample_img.shape[0]))
heatmap_resized = np.uint8(255 * heatmap_resized)
heatmap_color = cv2.applyColorMap(heatmap_resized, cv2.COLORMAP_JET)
superimposed_img = cv2.addWeighted(sample_img, 0.6, heatmap_color, 0.4, 0)

plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.imshow(sample_img)
plt.title('Original Image', fontweight='bold')
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(heatmap, cmap='jet')
plt.title('Grad-CAM Heatmap', fontweight='bold')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(superimposed_img)
plt.title('Crash Focus Areas', fontweight='bold')
plt.axis('off')

plt.show()

print("Grad-CAM visualization complete!")
print("   Red areas = high activation for crash detection")

Export Model

In [None]:
print("Exporting Model\n")

#save the complete model
MODEL_FILENAME = "TransitLK-MSFCD-CV-XGB-1.h5"
model.save(MODEL_FILENAME)

#convert to TensorFlow Lite for mobile deployment
print("\nConverting to TensorFlow Lite (for mobile)")

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]  #optimize for size/speed
converter.target_spec.supported_types = [tf.float16]  #use float16 for faster inference

tflite_model = converter.convert()

TFLITE_FILENAME = "TransitLK-MSFCD-CV-XGB-1.tflite"
with open(TFLITE_FILENAME, 'wb') as f:
    f.write(tflite_model)

print(f"Keras model saved: {MODEL_FILENAME}")
print(f"TensorFlow Lite model saved: {TFLITE_FILENAME}")
print(f"   Model size: {len(tflite_model) / 1024:.2f} KB")

#save class names for inference
with open('cv_class_names.json', 'w') as f:
    json.dump({'classes': class_names}, f)

print("\nFiles to download:")
print(f"   1. {MODEL_FILENAME} (Keras format)")
print(f"   2. {TFLITE_FILENAME} (Mobile-optimized)")
print(f"   3. cv_class_names.json (Class mapping)")