In [1]:
%pip install facenet-pytorch pandas tqdm scikit-learn opencv-python opencv-contrib-python

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting opencv-contrib-python
  Downloading opencv_contrib_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   -- ------------------------------------- 2.1/39.5 MB 11.8 MB/s eta 0:00:04
   ---- ----------------------------------- 4.5/39.5 MB 11.2 MB/s eta 0:00:04
   ------ --------------------------------- 6.8/39.5 MB 11.3 MB/s eta 0:00:03
   -------- ------------------------------- 8.4/39.5 MB 10.4 MB/s eta 0:00:03
   ---------- ----------------------------- 10.7/39.5 MB 10.7 MB/s eta 0:00:03
   ------------- -------------------------- 13.1/39.5 MB 10.8 MB/s eta 0:00:03
   --------------- ------------------------ 15.2/39.5 MB 10.7 MB/s eta 0:00:03
   -----------

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets

import numpy as np
from tqdm import tqdm
import random
from torchvision import transforms
import os
import pandas as pd
import time

DATA_DIR = "data"
RNG_SEED = 42
BATCH_SIZE = 8

random.seed(RNG_SEED)
torch.manual_seed(RNG_SEED)
np.random.seed(RNG_SEED)

from sklearn import svm
from sklearn.metrics import classification_report, accuracy_score


# Check M1 support
if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
print('Running on device: {}'.format(device))

Running on device: cpu


# Define Dataset

## Proccess raw images zip into a usable dataset

In [25]:
try:
    # create testing folder
    os.makedirs(DATA_DIR)

    # create label folders
    os.makedirs(f"{DATA_DIR}/face")
    os.makedirs(f"{DATA_DIR}/no_face")
except:
    print("Folders already exist.")

Folders already exist.


In [14]:
from IPython.display import Image

labels = pd.read_csv("labels.csv")

# Get all image names

files = []
labeled_files = set(labels["filename"].values)
for (dirpath, dirnames, filenames) in os.walk(DATA_DIR):
    files.extend(filenames)
    break

unmoved = labels[labels["filename"].isin(files)]
files = [file for file in files if file not in labeled_files]

### Labeling UI

In [15]:
import ipywidgets as widgets
from IPython.display import Image, display, clear_output

face_bttn = widgets.Button(description="Face")
no_face_bttn = widgets.Button(description="No Face")
out = widgets.Output()

count = [0]

curr_file = ''

def face_bttn_clicked(_):
    d = {'filename': files[0],
                   'label': 'face'}
    files.pop(0)
    labels.loc[len(labels)] = d

    show_widgets()
        
face_bttn.on_click(face_bttn_clicked)

def no_face_clicked(_):
    d = {'filename': files[0],
                   'label': 'no face'}
    files.pop(0)
    labels.loc[len(labels)] = d

    show_widgets()

no_face_bttn.on_click(no_face_clicked)

def show_widgets():
    clear_output(wait=True)
    buttons = widgets.HBox([face_bttn, no_face_bttn])
    
    image = widgets.Image(
        value=Image(filename=f"/{DATA_DIR}/{files[0]}").data,
        format="webp",
        width=300,
        height=300
    )
    
    text = widgets.Text(f"Total labeled: {len(labels)}")
    
    display(widgets.VBox([buttons, text, image, out]))
    
    
# show_widgets()

In [16]:
def move_files(row):
    filename = row["filename"]
    label = row["label"].replace(" ", "_")
    
    os.rename(f"data/{filename}", f"{data_dir}/{label}/{filename}")

faces = unmoved[unmoved["label"] == 'face']
no_faces = unmoved[unmoved["label"] == 'no face']

try:
    faces.apply(move_files, axis=1)
    no_faces.apply(move_files, axis=1)
    
    print("Moved files to relevant folders")
except:
    print("Images are already moved")

Moved files to relevant folders


## Define Loaders

In [36]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
])

tensor_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])


def collate_fn(batch):
    images, labels = zip(*batch)
    return list(images), list(labels)

base_train = datasets.ImageFolder(f"{DATA_DIR}/train", transform=transform)
base_test = datasets.ImageFolder(f"{DATA_DIR}/test", transform=transform)
tensor_train = datasets.ImageFolder(f"{DATA_DIR}/train", transform=tensor_transform)
tensor_test = datasets.ImageFolder(f"{DATA_DIR}/test", transform=tensor_transform)

base_loader_train = DataLoader(base_train, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True)
base_loader_test = DataLoader(base_test, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True)
tensor_loader_train = DataLoader(tensor_train, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True)
tensor_loader_test = DataLoader(tensor_test, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True,)

# Define MTCNN baseline
We use the default params for now

In [37]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    keep_all=True, device=device
)

## Testing MTCNN accuracy with manually labelled data

In [38]:
y_pred = []
y_true = []

for X, Y in tqdm(base_loader_test):
    for i in range(0, len(X)):
        x = X[i]
        y = Y[i]

        x_aligned, probs = mtcnn(x, return_prob=True)

        y_p = 0 if x_aligned is not None else 1
        
        y_pred.append(y_p)
    y_true.extend(Y)               

accuracy_score(y_true, y_pred)

100%|██████████| 1251/1251 [10:07<00:00,  2.06it/s] 


0.8097190280971903

## Training SVM Model


In [36]:
def preprocess_data(loader):
    X = []
    y = []
    for images, labels in tqdm(loader, desc="Flattening data"):
        # Convert images to numpy arrays and flatten
        images_flat = [np.array(img).flatten() for img in images]
        X.extend(images_flat)
        y.extend(labels)
    return np.array(X), np.array(y)

In [37]:
X_train, y_train = preprocess_data(base_loader_train)
X_test, y_test = preprocess_data(base_loader_train)

Flattening data: 100%|██████████| 8223/8223 [06:20<00:00, 21.60it/s]
Flattening data: 100%|██████████| 2056/2056 [01:37<00:00, 21.16it/s]


In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM Model 
svm_model = svm.SVC(kernel="linear")
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


## Training Logistic Regression

### Preprocess data

In [41]:
# BATCH_SIZE = 8
# IMG_SIZE = 256
# NUM_CLASSES = 2
BOVW_CLUSTERS = 500

# # We'll do basic transforms: resize + tensor + normalization.
# img_transforms = transforms.Compose([
#     transforms.Resize((IMG_SIZE, IMG_SIZE)),
#     transforms.ToTensor(),
# ])

# # Get datasets & loaders
# base_train = datasets.ImageFolder(f"{DATA_DIR}", transform=img_transforms)
# train_dataset, test_dataset = torch.utils.data.random_split(base_train, [TRAIN_TEST_SPLIT, 1 - TRAIN_TEST_SPLIT])
# train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,  num_workers=2)
# test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

### Get SIFT features

In [39]:
import cv2

# Create SIFT extractor
sift = cv2.SIFT_create()

In [51]:
def tensor_to_opencv_img(tensor_img):
    """
    Convert a single image from a PyTorch tensor (C,H,W) to a NumPy array (H,W) or (H,W,3).
    We'll convert to grayscale for SIFT.
    """
    # tensor_img shape: (3, H, W) if color
    # Move to CPU, convert to numpy
    img_np = tensor_img.cpu().numpy()

    # img_np shape is (3, H, W). We can convert to (H, W, 3) by transposing
    img_np = np.transpose(img_np, (1, 2, 0))  # (H, W, 3)

    # Convert to uint8 [0..255] if necessary
    img_np = (img_np * 255.0).clip(0, 255).astype(np.uint8)

    # Convert to grayscale
    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
    return gray

def extract_descriptors_from_dataloader(dataloader):
    """
    Loop through an entire DataLoader, extract SIFT descriptors for each image.
    """
    descriptors_per_image = []
    labels_list = []

    # Assume we already have train_loader that yields (images, labels)
    for images, labels in tqdm(dataloader):
        # images shape: (batch_size, 3, H, W)
        # labels shape: (batch_size,)
        
        batch_size = len(images)
        for i in range(batch_size):
            # Convert one image to grayscale OpenCV format
            gray_img = tensor_to_opencv_img(images[i])
            # Extract SIFT descriptors
            kp, desc = sift.detectAndCompute(gray_img, None)
            if desc is not None:
                descriptors_per_image.append(desc)
            else:
                # Some images might have no descriptors
                descriptors_per_image.append(np.zeros((0,128), dtype=np.float32))

            # We also keep the label so we can match it up later
            labels_list.append(labels[i])

    return descriptors_per_image, labels_list

def build_bovw_histogram(descriptors, kmeans_model):
    """
    Given SIFT descriptors (num_keypoints,128) for ONE image,
    assign each descriptor to the nearest cluster and build a histogram of size BOVW_CLUSTERS.
    """
    hist = np.zeros((BOVW_CLUSTERS), dtype=np.float32)
    if descriptors is None or len(descriptors) == 0:
        return hist  # no keypoints => zero histogram

    words = kmeans_model.predict(descriptors)
    for w in words:
        hist[w] += 1

    return hist

In [52]:
print("Collecting SIFT descriptors from train_loader...")
all_descriptors, all_labels = extract_descriptors_from_dataloader(tensor_loader_train)
print(f"Collected descriptors from {len(all_descriptors)} training images.")

# Stack all descriptors into one large array for K-Means (excluding empty ones)
desc_nonempty = [d for d in all_descriptors if d.shape[0] > 0]
if len(desc_nonempty) > 0:
    all_train_desc = np.vstack(desc_nonempty)
else:
    all_train_desc = np.zeros((0, 128), dtype=np.float32)

Collecting SIFT descriptors from train_loader...


100%|██████████| 9028/9028 [26:24<00:00,  5.70it/s]


Collected descriptors from 72219 training images.


In [53]:
from sklearn.cluster import KMeans

if all_train_desc.shape[0] == 0:
    print("No descriptors found in training set! Can't build K-Means.")
    exit()

print(f"Running K-Means on {all_train_desc.shape[0]} descriptors with {BOVW_CLUSTERS} clusters...")
kmeans = KMeans(n_clusters=BOVW_CLUSTERS, random_state=RNG_SEED, verbose=1)
kmeans.fit(all_train_desc)
print("K-Means done.")

Running K-Means on 2736701 descriptors with 500 clusters...
Initialization complete
Iteration 0, inertia 303670099968.0.
Iteration 1, inertia 220359655424.0.
Iteration 2, inertia 216534941696.0.
Iteration 3, inertia 214825074688.0.
Iteration 4, inertia 213802450944.0.
Iteration 5, inertia 213118320640.0.
Iteration 6, inertia 212629569536.0.
Iteration 7, inertia 212261535744.0.
Iteration 8, inertia 211970097152.0.
Iteration 9, inertia 211737919488.0.
Iteration 10, inertia 211544276992.0.
Iteration 11, inertia 211383533568.0.
Iteration 12, inertia 211243696128.0.
Iteration 13, inertia 211125272576.0.
Iteration 14, inertia 211022462976.0.
Iteration 15, inertia 210932908032.0.
Iteration 16, inertia 210853560320.0.
Iteration 17, inertia 210782568448.0.
Iteration 18, inertia 210719309824.0.
Iteration 19, inertia 210663849984.0.
Iteration 20, inertia 210610913280.0.
Iteration 21, inertia 210561892352.0.
Iteration 22, inertia 210514886656.0.
Iteration 23, inertia 210471206912.0.
Iteration 24, 

In [54]:
from sklearn.preprocessing import normalize

train_histograms = []
train_labels = []

idx = 0
print("Building BoVW histograms for training set...")
for desc in tqdm(all_descriptors):
    hist = build_bovw_histogram(desc, kmeans)
    train_histograms.append(hist)
    train_labels.append(all_labels[idx])
    idx += 1

train_histograms = np.array(train_histograms, dtype=np.float32)
train_labels = np.array(train_labels, dtype=np.int64)

# (Optional) Normalize histograms
train_histograms = normalize(train_histograms, norm='l2', axis=1)

print("Train BoVW shape:", train_histograms.shape)  # (num_train_images, NUM_CLUSTERS)


Building BoVW histograms for training set...


100%|██████████| 72219/72219 [00:25<00:00, 2885.69it/s]


Train BoVW shape: (72219, 500)


### Define the Logistic Regression Model

In [55]:
from sklearn.linear_model import LogisticRegression

print("Training Logistic Regression on BoVW histograms...")
clf = LogisticRegression(max_iter=1000)
clf.fit(train_histograms, train_labels)
print("Logistic Regression training complete.")


Training Logistic Regression on BoVW histograms...
Logistic Regression training complete.


In [56]:
print("Extracting SIFT descriptors from test_loader...")
test_descriptors_list, test_labels_list = extract_descriptors_from_dataloader(tensor_loader_test)

print("Building BoVW histograms for the test set...")
test_histograms = []
for desc in tqdm(test_descriptors_list):
    hist = build_bovw_histogram(desc, kmeans)
    test_histograms.append(hist)

test_histograms = np.array(test_histograms, dtype=np.float32)
test_histograms = normalize(test_histograms, norm='l2', axis=1)
test_labels = np.array(test_labels_list, dtype=np.int64)

print("Predicting on test histograms...")
test_preds = clf.predict(test_histograms)

Extracting SIFT descriptors from test_loader...


100%|██████████| 1251/1251 [03:23<00:00,  6.16it/s]


Building BoVW histograms for the test set...


100%|██████████| 10001/10001 [00:03<00:00, 3058.42it/s]

Predicting on test histograms...





In [57]:
accuracy = accuracy_score(test_labels, test_preds)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 60.41%


## Training Gaussian Mixture model


In [58]:
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler

In [62]:
# digits = load_digits()
X, y = [], []
for images, labels in tqdm(tensor_loader_train):
    # Flatten images to shape
    images_flat = [img.numpy().transpose(1, 2, 0).flatten() for img in images]
    X.extend(images_flat)
    y.extend(labels)

100%|██████████| 9028/9028 [25:24<00:00,  5.92it/s]


In [64]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y = np.asarray(y, dtype=int)

# X_train, X_test, y_train, y_test = train_test_split(train_histograms, train_labels, test_size=0.3, random_state=42)

n_classes = len(np.unique(y))
gmm_models = []
gmm_models_sift = []

In [67]:
print("Training with SIFT")
for label in tqdm(range(n_classes)):
    X_class = train_histograms[train_labels == label]
    
    gmm = GaussianMixture(n_components=n_classes, covariance_type='full', random_state=42)
    gmm.fit(X_class)
    gmm_models_sift.append(gmm)

print("Training without SIFT")
for label in tqdm(range(n_classes)):
    X_class = X_scaled[y == label]
    
    gmm = GaussianMixture(n_components=n_classes, covariance_type='full', random_state=42)
    gmm.fit(X_class)
    gmm_models.append(gmm)



# gmm = GaussianMixture(n_components=len(idx_to_class), random_state=42)
# gmm.fit(X_train)

# # Step 5: Predict Labels
# y_pred = gmm.predict(X_test)

  0%|          | 0/2 [01:17<?, ?it/s]


KeyboardInterrupt: 

In [None]:
y_pred = []

for sample in tqdm(X_test):
    likelihoods = gmm.score_samples(sample.reshape(1, -1))
    
    y_pred.append(np.argmax(likelihoods))
    
    
accuracy = accuracy_score(y_test, y_pred)

accuracy

print(accuracy)

# Labeling all images using MTCNN



In [None]:
from PIL import Image

result = []

def chunks(lst, batch_size):
    for i in range(0, len(lst), batch_size):
        yield lst[i:i + batch_size]
        
data_dir = "data/"
batch_size = 10
all_files = [
    os.path.join(root, f)
    for root, _, files in os.walk(data_dir)
    for f in files if f.endswith("webp")
]
batches = list(chunks(all_files, batch_size))

for batch in tqdm(batches):
    for i in range(len(batch)):
        if batch[i].endswith("webp"):
            image_path = os.path.join(data_dir, batch[i])
            try:
                img = Image.open(image_path).convert('RGB')
                boxes, probs = mtcnn.detect(img)
                
                label = "face" if boxes is not None else "no face"
                
                # Append result
                result.append({
                    "filename": batch[i],
                    "label": label
                })

            except Exception as e:
                print("Error processing {batch[i]}: {e}")

  0%|          | 21/7219 [00:28<2:43:06,  1.36s/it]


KeyboardInterrupt: 

In [None]:
import csv 

output_csv = 'mtcnn_labels.csv'

with open(output_csv, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=["filename", "label"])
    writer.writeheader()
    for result in result:
        writer.writerow(result)

In [None]:

# Fit the scaler on smaller chunks of the training data
batch_size = 1000  # Adjust based on your available memory
for i in tqdm(range(0, len(X_train), batch_size)):
    batch = X_train[i:i + batch_size]
    scaler.partial_fit(batch)  # Fit incrementally using partial batches

# Transform training and test data in chunks
def transform_in_batches(data, batch_size, scaler):
    transformed_data = []
    for i in tqdm(range(0, len(data), batch_size)):
        batch = data[i:i + batch_size]
        transformed_batch = scaler.transform(batch)
        transformed_data.append(transformed_batch)
    return np.vstack(transformed_data)

X_train_scaled = transform_in_batches(X_train, batch_size, scaler)
X_test_scaled = transform_in_batches(X_test, batch_size, scaler)