# Import Packages

In [1]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import pandas as pd
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split

2024-10-26 05:45:44.233043: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-26 05:45:44.313643: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-26 05:45:44.604824: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-26 05:45:44.604883: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-26 05:45:44.606340: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

### Load Data

In [1]:
def load_images_from_folder(folder_name, img_size=(112, 112)):
    images, labels = [], []
    folder_path = 'datasets/casia-webface/casia-webface/' + folder_name
    
    if os.path.exists(folder_path):
        for img_name in os.listdir(folder_path):
            img = cv.imread(os.path.join(folder_path, img_name))
            if img is not None:
                img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
                img = cv.resize(img, img_size)
                images.append(img)
                labels.append(folder_name)
    else:
        print(f'Folder {folder_name} contains a corrupted image: {img_name}')
            
    return np.array(images), np.array(labels)

def load_dataset(dataset_path, num_labels=50, img_size=(112, 112)):  # Reduced num_labels to 50
    images, labels = [], []
    
    folder_names = sorted([folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))])

    # Load random folders
    # folder_names = [folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))]

    for folder_name in folder_names[:num_labels]:
        imgs, lbls = load_images_from_folder(folder_name, img_size)
        images.extend(imgs)
        labels.extend(lbls)
    
    return np.array(images), np.array(labels)

# Path to the dataset
DATASET_PATH = 'datasets/casia-webface/casia-webface/'

# Load the dataset
X, y = load_dataset(DATASET_PATH, num_labels=50)  # Reduced num_labels to 50

# Encode the labels
unique_labels = np.unique(y)
labels_map = {label: i for i, label in enumerate(unique_labels)}
y_encoded = np.array([labels_map[label] for label in y])

y_categorical = tf.keras.utils.to_categorical(y_encoded)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

# Normalize the images
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Create the model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(112, 112, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(len(unique_labels), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with additional logging
try:
    model.fit(X_train, y_train, epochs=10, batch_size=32)
except Exception as e:
    print(f"Error during training: {e}")

# Evaluate the model
try:
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Accuracy: {accuracy * 100:.2f}%')
except Exception as e:
    print(f"Error during evaluation: {e}")

2024-10-26 05:47:16.277922: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-26 05:47:16.339196: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-26 05:47:16.651754: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-26 05:47:16.651818: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-26 05:47:16.654566: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

Shape of X: (9416, 112, 112, 3)
Shape of y: (9416,)
Type of X: <class 'numpy.ndarray'>
Type of y: <class 'numpy.ndarray'>
Labels: {'000000': 0, '000001': 1, '000002': 2, '000003': 3, '000004': 4, '000005': 5, '000006': 6, '000007': 7, '000008': 8, '000009': 9, '000010': 10, '000011': 11, '000012': 12, '000013': 13, '000014': 14, '000015': 15, '000016': 16, '000017': 17, '000018': 18, '000019': 19, '000020': 20, '000021': 21, '000022': 22, '000023': 23, '000024': 24, '000025': 25, '000026': 26, '000027': 27, '000028': 28, '000029': 29, '000030': 30, '000031': 31, '000032': 32, '000033': 33, '000034': 34, '000035': 35, '000036': 36, '000037': 37, '000038': 38, '000039': 39, '000040': 40, '000041': 41, '000042': 42, '000043': 43, '000044': 44, '000045': 45, '000046': 46, '000047': 47, '000048': 48, '000049': 49}
Shape of y_encoded: (9416,)
Type of y_encoded: <class 'numpy.ndarray'>
Shape of y_categorical: (9416, 50)
Type of y_categorical: <class 'numpy.ndarray'>
Shape of X_train: (7532, 1