# Gravitational Lensing Detection with MobileNetV3

This notebook demonstrates a complete workflow for detecting gravitational lenses using deep learning and transfer learning with MobileNetV3. We will load and preprocess image data, train a classifier, evaluate its performance, and visualize results.

## 1. Import Required Libraries
Import essential libraries for data handling, visualization, and deep learning.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow.keras.backend as K

## 2. Load and Explore the Dataset
Load the CSV file and display the first few rows to understand the data structure.

In [None]:
# Load the dataset
csv_path = 'labelled_images_clean.csv'
df = pd.read_csv(csv_path)
df.head()

In [None]:
# Check for missing values
print('Missing values per column:')
print(df.isnull().sum())

# Show class distribution
print('Class distribution:')
print(df['category'].value_counts())

## 3. Preprocess the Data
Map categories to integers, split the dataset, and prepare data generators with augmentation.

In [None]:
# Map categories to integers
label_map = {'A': 0, 'B': 1, 'C': 2}
df['label'] = df['category'].map(label_map)

# Split dataset
train_df, temp_df = train_test_split(df, test_size=0.30, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

# Data generators
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)
val_test_datagen = ImageDataGenerator(rescale=1./255)

def make_generator(datagen, df, shuffle=True):
    df = df.copy()
    df['label'] = df['label'].astype(str)
    return datagen.flow_from_dataframe(
        dataframe=df,
        directory='new_pngs',
        x_col='img_src',
        y_col='label',
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=shuffle
    )

train_gen = make_generator(train_datagen, train_df)
val_gen = make_generator(val_test_datagen, val_df, shuffle=False)
test_gen = make_generator(val_test_datagen, test_df, shuffle=False)

## 4. Build and Train a Machine Learning Model
Build a MobileNetV3-based classifier, compile it, and train with early stopping and model checkpointing.

In [None]:
# Build MobileNetV3 model
base_model = MobileNetV3Small(
    input_shape=IMG_SIZE + (3,),
    include_top=False,
    weights='imagenet'
)

# Freeze initial layers
for layer in base_model.layers[:80]:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
output = Dense(3, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

def f1_score(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true * y_pred, 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true) * y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true * (1-y_pred), 'float'), axis=0)
    precision = tp / (tp + fp + K.epsilon())
    recall = tp / (tp + fn + K.epsilon())
    f1 = 2*precision*recall / (precision + recall + K.epsilon())
    return K.mean(f1)

model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy', f1_score]
)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
    ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)
]

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=50,
    callbacks=callbacks,
    steps_per_epoch=len(train_gen),
    validation_steps=len(val_gen)
)

## 5. Evaluate Model Performance
Assess the model's performance on the test set and visualize training history.

In [None]:
# Evaluate on test set
test_loss, test_acc, test_f1 = model.evaluate(test_gen)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}, Test F1: {test_f1:.4f}")

# Plot training history
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.show()

## 6. Make Predictions with the Trained Model
Use the trained model to make predictions on new or unseen data and display a few sample predictions.

In [None]:
# Make predictions on test data
import random
class_names = ['A', 'B', 'C']

test_gen.reset()
preds = model.predict(test_gen, verbose=1)
pred_labels = np.argmax(preds, axis=1)
true_labels = test_gen.classes

# Show classification report
print(classification_report(true_labels, pred_labels, target_names=class_names))

# Show a few sample predictions with images
indices = random.sample(range(len(test_gen.filenames)), 5)
plt.figure(figsize=(15, 5))
for i, idx in enumerate(indices):
    img_path = os.path.join('new_pngs', test_gen.filenames[idx])
    img = plt.imread(img_path)
    plt.subplot(1, 5, i+1)
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"True: {class_names[true_labels[idx]]}\nPred: {class_names[pred_labels[idx]]}")
plt.show()