<a href="https://colab.research.google.com/github/aramirezfr/Facial-Recognition-with-Deep-Learning-Neural-Networks/blob/master/Facial_Recognition_using_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Preparation:

In [3]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import matplotlib.image as mpimg
import seaborn as sns
import math

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
from tensorflow.keras.utils import load_img, img_to_array
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.applications.resnet50 import preprocess_input
from keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

import warnings
warnings.filterwarnings('ignore')

-----------

## Face Dataset:

I will be using the face dataset to train my model. I need to set the face dataset as the positive outcome of the model.
I will also use the object dataset as the negative outcome.

In [1]:
#Downloading the data file from Kaggle
#!kaggle datasets download -d jessicali9530/lfw-dataset

Dataset URL: https://www.kaggle.com/datasets/jessicali9530/lfw-dataset
License(s): other
lfw-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [11]:
#Unzip the data folder
#!unzip lfw-dataset.zip -d data

In [12]:
#define the root directory you want to start from
face_root_dir = 'data/lfw-deepfunneled/lfw-deepfunneled/'

#list to store all jpeg file paths
face_jpg_files = []

#walk the directory tree
for dirpath, dirnames, files in os.walk(face_root_dir):
    for file in files:
        # Check if the file ends with .jpg
        if file.endswith('.jpg'):
            # Construct the full file path
            full_path = os.path.join(dirpath, file)
            # Append to the list
            face_jpg_files.append(full_path)

#print all found jpg file paths
#for jpg_file in face_jpg_files:
 #   print(jpg_file)

In [5]:
len(face_jpg_files)

13233

-------------

Loading Objects Dataset

In [6]:
# Downloading the data file from Kaggle of the objects
#!kaggle datasets download -d akash2sharma/tiny-imagenet

Dataset URL: https://www.kaggle.com/datasets/akash2sharma/tiny-imagenet
License(s): unknown
tiny-imagenet.zip: Skipping, found more recently modified local copy (use --force to force download)


In [13]:
#unzip the data folder
#!unzip tiny-imagenet.zip -d data

In [29]:
object_root_dir = 'data/tiny-imagenet-200/'
object_jpg_files = []

#walk the tiny objects directory tree
for dirpath, dirnames, files in os.walk(object_root_dir):
    for file in files:
        # Check if the file ends with .JPEG
        if file.endswith('.JPEG'):
            # Construct the full file path
            full_path = os.path.join(dirpath, file)
            # Append to the list
            object_jpg_files.append(full_path)
#print all found jpg file paths
#for jpg_file in object_jpg_files:
#  print(jpg_file)

In [28]:
len(object_jpg_files)

120000

----------------------------

In [24]:
# Assuming face_jpg_files contains paths to all face images
# And object_jpg_files contains paths to all object images

# Create labels for each set
face_labels = [1] * len(face_jpg_files)  # Positive class
object_labels = [0] * len(object_jpg_files)  # Negative class

# Combine lists and labels
all_files = face_jpg_files + object_jpg_files
all_labels = face_labels + object_labels

# Convert to TensorFlow Dataset
files_ds = tf.data.Dataset.from_tensor_slices(all_files)
labels_ds = tf.data.Dataset.from_tensor_slices(all_labels)
dataset = tf.data.Dataset.zip((files_ds, labels_ds))

# Define the target image size
img_height = 128  # or another size that fits your model and resources
img_width = 128

# Function to load and preprocess images
def load_and_preprocess_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [img_height, img_width])
    image /= 255.0  # Normalize to [0,1]
    return image, label
# Define the batch size
batch_size = 32

# Apply the preprocessing function
dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

# Prepare for training
dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming all_files and all_labels are your image paths and labels
all_files = np.array(all_files)
all_labels = np.array(all_labels)

# Assuming all_files and all_labels are your image paths and labels
X_train, X_temp, y_train, y_temp = train_test_split(
    all_files, all_labels, test_size=0.3, random_state=42, stratify=all_labels
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

# Resulting proportions:
# X_train: 70% of the total data
# X_val: 15% of the total data
# X_test: 15% of the total data

In [None]:
print(f"Training set: {len(X_train)} images")
print(f"Validation set: {len(X_val)} images")
print(f"Test set: {len(X_test)} images")

In [None]:
# Example for creating a TensorFlow Dataset for training
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

# Similarly, create validation and test datasets
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)


In [26]:
import shutil
shutil.rmtree('data/tiny-imagenet-200/tiny-imagenet-200', ignore_errors=True)

In [None]:
import os
import glob
import numpy as np
from sklearn.model_selection import train_test_split

# Paths to your datasets
face_dir = 'path/to/face/dataset/'
object_dirs = ['path/to/object/dataset1/', 'path/to/object/dataset2/']

# Collect all face image paths and label them as 1 (positive)
face_images = glob.glob(os.path.join(face_dir, '*.jpg'))
face_labels = [1] * len(face_images)

# Collect all object image paths and label them as 0 (negative)
object_images = []
for obj_dir in object_dirs:
    object_images.extend(glob.glob(os.path.join(obj_dir, '*.jpg')))
object_labels = [0] * len(object_images)

# Combine face and object datasets
all_images = face_images + object_images
all_labels = face_labels + object_labels

# Convert to numpy arrays
all_images = np.array(all_images)
all_labels = np.array(all_labels)

# Shuffle and split the data into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(all_images, all_labels, test_size=0.3, random_state=42, stratify=all_labels)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

print(f"Training set: {len(X_train)} images")
print(f"Validation set: {len(X_val)} images")
print(f"Test set: {len(X_test)} images")
Key Points:
Labels: 1 for face images and 0 for object images.
Stratification: Using stratify=all_labels ensures that the split maintains the same proportion of face and object images in each subset, which is important for balanced datasets.
Data Loading: Ensure you have a function to load the images when feeding them to the model (e.g., using TensorFlow's tf.data API or PyTorch's DataLoader).
Training the Model:
When training your model, ensure it is set up for binary classification. Hereâ€™s a quick example using Keras:
import tensorflow as tf

# Define a simple CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
# Ensure you have a data pipeline to load X_train and y_train
# model.fit(train_dataset, validation_data=val_dataset, epochs=10)

# Evaluate the model
# model.evaluate(test_dataset)
Ensure your data pipeline correctly feeds the image paths and corresponding labels into your model during training.