# Import Packages

In [1]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import pandas as pd
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split

2024-10-26 05:07:19.597559: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-26 05:07:19.653980: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-26 05:07:19.887282: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-26 05:07:19.887362: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-26 05:07:19.889026: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

### Load Data

In [2]:
def load_images_from_folder(folder_name, img_size = (112, 112)):
    images, labels = [], []
    folder_path = 'datasets/casia-webface/casia-webface/' + folder_name
    
    if os.path.exists(folder_path):
        for img_name in os.listdir(folder_path):
            img = cv.imread(os.path.join(folder_path, img_name))
            if img is not None:
                img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
                img = cv.resize(img, img_size)
                images.append(img)
                labels.append(folder_name)
    else:
        print(f'Folder {folder_name} contains a corrupted image: {img_name}')
            
    return np.array(images), np.array(labels)

def load_dataset(dataset_path, num_labels = 100, img_size = (112, 112)):
    images, labels = [], []
    
    folder_names = [folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))]

    for folder_name in folder_names[:num_labels]:
        imgs, lbls = load_images_from_folder(folder_name, img_size)
        images.extend(imgs)
        labels.extend(lbls)
    
    return np.array(images), np.array(labels)

# Path to the dataset
DATASET_PATH = 'datasets/casia-webface/casia-webface/'

# Load the dataset
X, y = load_dataset(DATASET_PATH, num_labels = 200)

# Encode the labels
unique_labels = np.unique(y)
label_map = {label: i for i, label in enumerate(unique_labels)}
y_encoded = np.array([label_map[label] for label in y])
y_categorical = tf.keras.utils.to_categorical(y_encoded)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size = 0.2, random_state = 42)

# Normalize the images
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Create the model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (112, 112, 3)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation = 'relu'))
model.add(tf.keras.layers.Dense(len(unique_labels), activation = 'softmax'))

# Compile the model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs = 10, batch_size = 32)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')

Labels: {'000021': 0, '000093': 1, '000099': 2, '000171': 3, '000278': 4, '000329': 5, '000375': 6, '000384': 7, '000397': 8, '000411': 9, '000454': 10, '000465': 11, '000536': 12, '000582': 13, '000650': 14, '000665': 15, '000672': 16, '000797': 17, '001005': 18, '001123': 19, '001149': 20, '001181': 21, '001268': 22, '001308': 23, '001333': 24, '001372': 25, '001521': 26, '001633': 27, '001652': 28, '001678': 29, '001696': 30, '002034': 31, '002035': 32, '002224': 33, '002250': 34, '002304': 35, '002350': 36, '002497': 37, '002761': 38, '002799': 39, '002825': 40, '002878': 41, '002949': 42, '002995': 43, '003084': 44, '003148': 45, '003189': 46, '003207': 47, '003249': 48, '003300': 49, '003356': 50, '003372': 51, '003446': 52, '003538': 53, '003563': 54, '003692': 55, '003719': 56, '003733': 57, '003861': 58, '003966': 59, '004039': 60, '004233': 61, '004284': 62, '004418': 63, '004540': 64, '004692': 65, '004891': 66, '004933': 67, '004961': 68, '005184': 69, '005233': 70, '005270