In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from google.colab import drive
drive.mount('/content/drive')


# Define dataset path and parameters
data_dir = "/content/drive/MyDrive/data/data"  # path to the dataset root directory
# IMPORTANT: Ensure this path exists and contains your image data organized in subdirectories,
# each subdirectory representing a class (e.g., /content/drive/MyDrive/data/data/class1, /content/drive/MyDrive/data/data/class2).
# If the path is incorrect, please update 'data_dir' to point to the correct location of your dataset.
IMG_HEIGHT, IMG_WIDTH = 64, 64  # target image size for the CNN
BATCH_SIZE = 32

# Create an ImageDataGenerator for training and validation
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)


# Load training data (80% of images) from directory, with real-time preprocessing
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),  # resize images to 64x64:contentReference[oaicite:5]{index=5}
    batch_size=BATCH_SIZE,
    class_mode='categorical',   # for multi-class classification (one-hot labels)
    subset='training',
    shuffle=True,
    seed=42  # seed for reproducible data splitting
)

# Load validation data (20% of images)
val_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=True,
    seed=42
)

# Print the detected class indices to see label encoding
print("Class label mapping:", train_generator.class_indices)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/data/data'

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

num_classes = len(train_generator.class_indices)  # number of student IDs

# Build a Sequential CNN model
model = Sequential([
    # First convolutional layer
    Conv2D(32, (3,3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    MaxPooling2D(pool_size=(2,2)),

    # Second convolutional layer
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),

    # Flatten the feature maps into a 1D vector
    Flatten(),

    # Fully connected layer
    Dense(128, activation='relu'),
    Dropout(0.5),  # Dropout for regularization (50% of neurons dropped during training)

    # Output layer - softmax for multi-class classification
    Dense(num_classes, activation='softmax')
])

# Print model summary to see the architecture
model.summary()


In [None]:
# Compile the model
model.compile(
    loss='categorical_crossentropy',         # multi-class loss function:contentReference[oaicite:13]{index=13}
    optimizer=tf.keras.optimizers.Adam(),    # Adam optimizer
    metrics=['accuracy']
)

# Train the model with the generators
epochs = 20
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=val_generator
)

final_loss, final_accuracy = model.evaluate(val_generator)
print(f"Validation accuracy: {final_accuracy * 100:.2f}%")



In [None]:
# Save the trained model to an H5 file
model.save("face_recognition_model.h5")
print("Model saved as face_recognition_model.h5")

# Save the label mapping to a file (to use later in the OpenCV script)
import json
label_map = train_generator.class_indices
# Invert the mapping to get index->label
id_to_label = {v: k for k, v in label_map.items()}
with open("label_map.json", "w") as f:
    json.dump(id_to_label, f)
print("Label mapping saved to label_map.json:", id_to_label)


In [None]:
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator
)


In [None]:
import matplotlib.pyplot as plt

# Accuracy plot
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training vs Validation Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()

plt.show()


In [None]:
# Convert class index to student ID
idx_to_class = {v: k for k, v in val_generator.class_indices.items()}


In [None]:
val_generator.reset()



In [None]:
plt.figure(figsize=(15, 8))

import numpy as np
import matplotlib.pyplot as plt

IMAGES_PER_PAGE = 12  # change if needed

page_images = []
page_preds = []
page_trues = []

for i in range(len(val_generator)):
    images, labels = next(val_generator)

    predictions = model.predict(images)

    pred_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(labels, axis=1)

    for j in range(len(images)):
        page_images.append(images[j])
        page_preds.append(pred_classes[j])
        page_trues.append(true_classes[j])

        # Show images in pages
        if len(page_images) == IMAGES_PER_PAGE:
            plt.figure(figsize=(15, 8))

            for k in range(IMAGES_PER_PAGE):
                plt.subplot(3, 4, k + 1)
                plt.imshow(page_images[k])
                plt.axis('off')

                pred_id = idx_to_class[page_preds[k]]
                true_id = idx_to_class[page_trues[k]]

                color = "green" if pred_id == true_id else "red"
                plt.title(f"Pred: {pred_id}\nTrue: {true_id}", color=color)

            plt.tight_layout()
            plt.show()

            page_images, page_preds, page_trues = [], [], []


In [None]:
if len(page_images) > 0:
    plt.figure(figsize=(15, 8))

    for k in range(len(page_images)):
        plt.subplot(3, 4, k + 1)
        plt.imshow(page_images[k])
        plt.axis('off')

        pred_id = idx_to_class[page_preds[k]]
        true_id = idx_to_class[page_trues[k]]

        color = "green" if pred_id == true_id else "red"
        plt.title(f"Pred: {pred_id}\nTrue: {true_id}", color=color)

    plt.tight_layout()
    plt.show()


In [None]:
import matplotlib.pyplot as plt

if len(wrong_images) == 0:
    print("ðŸŽ‰ No wrong predictions! Model is performing perfectly.")
else:
    plt.figure(figsize=(15, 10))

    for i in range(min(12, len(wrong_images))):
        plt.subplot(3, 4, i + 1)
        plt.imshow(wrong_images[i])
        plt.axis('off')

        pred_id = idx_to_class[wrong_preds[i]]
        true_id = idx_to_class[wrong_trues[i]]

        plt.title(f"Pred: {pred_id}\nTrue: {true_id}", color="red")

    plt.tight_layout()
    plt.show()


In [None]:
# Save trained model
model.save("/content/drive/MyDrive/face_recognition_model.h5")

print("Model saved successfully.")
