In [None]:
%pip install facenet-pytorch pandas tqdm scikit-learn opencv-python opencv-contrib-python

In [None]:
from facenet_pytorch import MTCNN
import torch
from torch.utils.data import DataLoader
from torchvision import datasets

import numpy as np
from tqdm import tqdm
import random
from torchvision import transforms
import os
import pandas as pd
import time

DATA_DIR = "data"
RNG_SEED = 42
BATCH_SIZE = 8
BOVW_CLUSTERS = 500

random.seed(RNG_SEED)
torch.manual_seed(RNG_SEED)
np.random.seed(RNG_SEED)

from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


# Check M1 support
if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
print('Running on device: {}'.format(device))

# Define Dataset

## Proccess raw images zip into a usable dataset

In [None]:
try:
    # create testing folder
    os.makedirs(DATA_DIR)

    # create label folders
    os.makedirs(f"{DATA_DIR}/face")
    os.makedirs(f"{DATA_DIR}/no_face")
except:
    print("Folders already exist.")

In [3]:
from IPython.display import Image

labels = pd.read_csv("labels.csv")

# Get all image names

files = []
labeled_files = set(labels["filename"].values)
for (dirpath, dirnames, filenames) in os.walk(DATA_DIR):
    files.extend(filenames)
    break

unmoved = labels[labels["filename"].isin(files)]
files = [file for file in files if file not in labeled_files]

### Labeling UI

In [15]:
import ipywidgets as widgets
from IPython.display import Image, display, clear_output

face_bttn = widgets.Button(description="Face")
no_face_bttn = widgets.Button(description="No Face")
out = widgets.Output()

count = [0]

curr_file = ''

def face_bttn_clicked(_):
    d = {'filename': files[0],
                   'label': 'face'}
    files.pop(0)
    labels.loc[len(labels)] = d

    show_widgets()
        
face_bttn.on_click(face_bttn_clicked)

def no_face_clicked(_):
    d = {'filename': files[0],
                   'label': 'no face'}
    files.pop(0)
    labels.loc[len(labels)] = d

    show_widgets()

no_face_bttn.on_click(no_face_clicked)

def show_widgets():
    clear_output(wait=True)
    buttons = widgets.HBox([face_bttn, no_face_bttn])
    
    image = widgets.Image(
        value=Image(filename=f"/{DATA_DIR}/{files[0]}").data,
        format="webp",
        width=300,
        height=300
    )
    
    text = widgets.Text(f"Total labeled: {len(labels)}")
    
    display(widgets.VBox([buttons, text, image, out]))
    
    
# show_widgets()

In [None]:
def move_files(row):
    filename = row["filename"]
    label = row["label"].replace(" ", "_")
    
    os.rename(f"data/{filename}", f"{DATA_DIR}/{label}/{filename}")

faces = unmoved[unmoved["label"] == 'face']
no_faces = unmoved[unmoved["label"] == 'no face']

try:
    faces.apply(move_files, axis=1)
    no_faces.apply(move_files, axis=1)
    
    print("Moved files to relevant folders")
except:
    print("Images are already moved")

## Define Loaders

In [4]:
transform = transforms.Compose([
    transforms.Resize((160, 160)),
])

tensor_transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor()
])


def collate_fn(batch):
    images, labels = zip(*batch)
    return list(images), list(labels)

base_train = datasets.ImageFolder(f"{DATA_DIR}/train", transform=transform)
base_test = datasets.ImageFolder(f"{DATA_DIR}/test", transform=transform)
tensor_train = datasets.ImageFolder(f"{DATA_DIR}/train", transform=tensor_transform)
tensor_test = datasets.ImageFolder(f"{DATA_DIR}/test", transform=tensor_transform)

base_loader_train = DataLoader(base_train, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True)
base_loader_test = DataLoader(base_test, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True)
tensor_loader_train = DataLoader(tensor_train, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True)
tensor_loader_test = DataLoader(tensor_test, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True,)

# Define MTCNN baseline
We use the default params for now

In [6]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    keep_all=True, device=device
)

## Testing MTCNN accuracy with manually labelled data

In [None]:
y_pred = []
y_true = []

for X, Y in tqdm(base_loader_test):
    for i in range(0, len(X)):
        x = X[i]
        y = Y[i]

        x_aligned, probs = mtcnn(x, return_prob=True)

        y_p = 0 if x_aligned is not None else 1
        
        y_pred.append(y_p)
    y_true.extend(Y)               

accuracy_score(y_true, y_pred)

## Training SVM Model


In [36]:
def preprocess_data(loader):
    X = []
    y = []
    for images, labels in tqdm(loader, desc="Flattening data"):
        # Convert images to numpy arrays and flatten
        images_flat = [np.array(img).flatten() for img in images]
        X.extend(images_flat)
        y.extend(labels)
    return np.array(X), np.array(y)

In [None]:
X_train, y_train = preprocess_data(base_loader_train)
X_test, y_test = preprocess_data(base_loader_train)

In [None]:
# Initialize the scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# # Perform PCA 
pca = PCA(n_components=50)  
X_train_pca = pca.fit_transform(X_train_scaled) 
X_test_pca = pca.transform(X_test_scaled)

In [None]:
explained_variance_ratio = np.array(pca.explained_variance_ratio_)

cumulative_variance = np.cumsum(explained_variance_ratio)

cumulative_variance

In [None]:
# Train SVM Model 
svm_model = svm.SVC(kernel="rbf",verbose=True)
svm_model.fit(X_train_pca, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_pca)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

## Training Logistic Regression

## Preprocess data

### Get SIFT features

In [8]:
import cv2

# Create SIFT extractor
sift = cv2.SIFT_create()

In [9]:
def tensor_to_opencv_img(tensor_img):
    """
    Convert a single image from a PyTorch tensor (C,H,W) to a NumPy array (H,W) or (H,W,3).
    We'll convert to grayscale for SIFT.
    """
    # tensor_img shape: (3, H, W) if color
    # Move to CPU, convert to numpy
    img_np = tensor_img.cpu().numpy()

    # img_np shape is (3, H, W). We can convert to (H, W, 3) by transposing
    img_np = np.transpose(img_np, (1, 2, 0))  # (H, W, 3)

    # Convert to uint8 [0..255] if necessary
    img_np = (img_np * 255.0).clip(0, 255).astype(np.uint8)

    # Convert to grayscale
    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
    return gray

def extract_descriptors_from_dataloader(dataloader):
    """
    Loop through an entire DataLoader, extract SIFT descriptors for each image.
    """
    descriptors_per_image = []
    labels_list = []

    # Assume we already have train_loader that yields (images, labels)
    for images, labels in tqdm(dataloader):
        # images shape: (batch_size, 3, H, W)
        # labels shape: (batch_size,)
        
        batch_size = len(images)
        for i in range(batch_size):
            # Convert one image to grayscale OpenCV format
            gray_img = tensor_to_opencv_img(images[i])
            # Extract SIFT descriptors
            kp, desc = sift.detectAndCompute(gray_img, None)
            if desc is not None:
                descriptors_per_image.append(desc)
            else:
                # Some images might have no descriptors
                descriptors_per_image.append(np.zeros((0,128), dtype=np.float32))

            # We also keep the label so we can match it up later
            labels_list.append(labels[i])

    return descriptors_per_image, labels_list

def build_bovw_histogram(descriptors, kmeans_model):
    """
    Given SIFT descriptors (num_keypoints,128) for ONE image,
    assign each descriptor to the nearest cluster and build a histogram of size BOVW_CLUSTERS.
    """
    hist = np.zeros((BOVW_CLUSTERS), dtype=np.float32)
    if descriptors is None or len(descriptors) == 0:
        return hist  # no keypoints => zero histogram

    words = kmeans_model.predict(descriptors)
    for w in words:
        hist[w] += 1

    return hist

In [None]:
print("Collecting SIFT descriptors from train_loader...")
all_descriptors, all_labels = extract_descriptors_from_dataloader(tensor_loader_train)
print(f"Collected descriptors from {len(all_descriptors)} training images.")

# Stack all descriptors into one large array for K-Means (excluding empty ones)
desc_nonempty = [d for d in all_descriptors if d.shape[0] > 0]
if len(desc_nonempty) > 0:
    all_train_desc = np.vstack(desc_nonempty)
else:
    all_train_desc = np.zeros((0, 128), dtype=np.float32)

In [None]:
from sklearn.cluster import KMeans

if all_train_desc.shape[0] == 0:
    print("No descriptors found in training set! Can't build K-Means.")
    exit()

print(f"Running K-Means on {all_train_desc.shape[0]} descriptors with {BOVW_CLUSTERS} clusters...")
kmeans = KMeans(n_clusters=BOVW_CLUSTERS, random_state=RNG_SEED, verbose=1)
kmeans.fit(all_train_desc)
print("K-Means done.")

In [None]:
from sklearn.preprocessing import normalize

train_histograms = []
train_labels = []

idx = 0
print("Building BoVW histograms for training set...")
for desc in tqdm(all_descriptors):
    hist = build_bovw_histogram(desc, kmeans)
    train_histograms.append(hist)
    train_labels.append(all_labels[idx])
    idx += 1

train_histograms = np.array(train_histograms, dtype=np.float32)
train_labels = np.array(train_labels, dtype=np.int64)

# (Optional) Normalize histograms
train_histograms = normalize(train_histograms, norm='l2', axis=1)

print("Train BoVW shape:", train_histograms.shape)  # (num_train_images, NUM_CLUSTERS)


### Define the Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression

print("Training Logistic Regression on BoVW histograms...")
clf = LogisticRegression(max_iter=1000)
clf.fit(train_histograms, train_labels)
print("Logistic Regression training complete.")


In [None]:
print("Extracting SIFT descriptors from test_loader...")
test_descriptors_list, test_labels_list = extract_descriptors_from_dataloader(tensor_loader_test)

print("Building BoVW histograms for the test set...")
test_histograms = []
for desc in tqdm(test_descriptors_list):
    hist = build_bovw_histogram(desc, kmeans)
    test_histograms.append(hist)

test_histograms = np.array(test_histograms, dtype=np.float32)
test_histograms = normalize(test_histograms, norm='l2', axis=1)
test_labels = np.array(test_labels_list, dtype=np.int64)

print("Predicting on test histograms...")
test_preds = clf.predict(test_histograms)

In [None]:
accuracy = accuracy_score(test_labels, test_preds)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

## Training Gaussian Mixture model


In [None]:
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_digits

In [None]:
X, y = [], []
for images, labels in tqdm(tensor_loader_train):
    # Flatten images to shape
    images_flat = [img.numpy().transpose(1, 2, 0).flatten() for img in images]
    X.extend(images_flat)
    y.extend(labels)

In [64]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y = np.asarray(y, dtype=int)

# X_train, X_test, y_train, y_test = train_test_split(train_histograms, train_labels, test_size=0.3, random_state=42)

n_classes = len(np.unique(y))
gmm_models = []
gmm_models_sift = []

In [None]:
print("Training with SIFT")
for label in tqdm(range(n_classes)):
    X_class = train_histograms[train_labels == label]
    
    gmm = GaussianMixture(n_components=n_classes, covariance_type='full', random_state=42)
    gmm.fit(X_class)
    gmm_models_sift.append(gmm)

print("Training without SIFT")
for label in tqdm(range(n_classes)):
    X_class = X_scaled[y == label]
    
    gmm = GaussianMixture(n_components=n_classes, covariance_type='full', random_state=42)
    gmm.fit(X_class)
    gmm_models.append(gmm)



# gmm = GaussianMixture(n_components=len(idx_to_class), random_state=42)
# gmm.fit(X_train)

# # Step 5: Predict Labels
# y_pred = gmm.predict(X_test)

In [None]:
y_pred = []

for sample in tqdm(X_test):
    likelihoods = gmm.score_samples(sample.reshape(1, -1))
    
    y_pred.append(np.argmax(likelihoods))
    
    
accuracy = accuracy_score(y_test, y_pred)

accuracy

print(accuracy)