In [46]:
%pip install facenet-pytorch pandas tqdm scikit-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [47]:
from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torch.utils.data import DataLoader
from torchvision import datasets

import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import random

RNG_SEED = 42

random.seed(RNG_SEED)
torch.manual_seed(RNG_SEED)
np.random.seed(RNG_SEED)


In [48]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

Running on device: cpu


# Define MTCNN baseline
We use the default params for now

In [49]:
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    keep_all=True, device=device
)

In [51]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((128, 128)),
])

tensor_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])


def collate_fn(batch):
    images, labels = zip(*batch)
    return list(images), list(labels)

# Need to be changed
DATA_DIR = "/Users/kantatanahashi/Downloads/archive/data/data"

# Dataset for training
dataset_train = datasets.ImageFolder(f"{DATA_DIR}/train", transform=transform)
training_size = 5000
random.shuffle(dataset_train.samples)
dataset_train.samples = dataset_train.samples[:training_size]
dataset_train.targets = [s[1] for s in dataset_train.samples]
loader_train = DataLoader(dataset_train, collate_fn=collate_fn, batch_size=8, shuffle=True)

idx_to_class_train = {i:c for c, i in dataset_train.class_to_idx.items()}

# Dataset for testing
dataset_test = datasets.ImageFolder(f"{DATA_DIR}/test", transform=transform)
loader_test = DataLoader(dataset_test, collate_fn=collate_fn, batch_size=8, shuffle=True,)

tensor_dataset = datasets.ImageFolder(f'{DATA_DIR}/test', transform=tensor_transform)
tensor_loader = DataLoader(tensor_dataset, collate_fn=collate_fn, batch_size=8, shuffle=True)

idx_to_class_test = {i:c for c, i in dataset_test.class_to_idx.items()}



In [52]:
len(loader_train)

625

## Training SVM Model


In [53]:
import os
import cv2
import numpy as np
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from PIL import Image


In [56]:
def preprocess_data(loader):
    X = []
    y = []
    for images, labels in tqdm(loader, desc="Flattening data"):
        # Convert images to numpy arrays and flatten
        images_flat = [np.array(img).flatten() for img in images]
        X.extend(images_flat)
        y.extend(labels)
    return np.array(X), np.array(y)

In [None]:
X_train, y_train = preprocess_data(loader_train)
X_test, y_test = preprocess_data(loader_test)

Flattening data: 100%|██████████| 625/625 [01:31<00:00,  6.83it/s]
Flattening data:  26%|██▌       | 322/1251 [00:49<02:04,  7.47it/s]

In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM Model 
svm_model = svm.SVC(kernel="linear",verbose=True)
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


training done
Accuracy: 0.57


## Training Gaussian Mixture model


In [None]:
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler

In [None]:
# digits = load_digits()
X, y = [], []
for images, labels in tqdm(tensor_loader):
    # Flatten images to shape
    images_flat = [img.numpy().transpose(1, 2, 0).flatten() for img in images]
    X.extend(images_flat)
    y.extend(labels)

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y = np.asarray(y, dtype=int)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

n_classes = len(np.unique(y_train))
gmm_models = []

In [None]:
for label in tqdm(range(n_classes)):
    X_class = X_train[y_train == label]
    
    gmm = GaussianMixture(n_components=n_classes, covariance_type='full', random_state=42)
    gmm.fit(X_class)
    gmm_models.append(gmm)

# gmm = GaussianMixture(n_components=len(idx_to_class), random_state=42)
# gmm.fit(X_train)

# # Step 5: Predict Labels
# y_pred = gmm.predict(X_test)

In [None]:
y_pred = []

for sample in tqdm(X_test):
    likelihoods = gmm.score_samples(sample.reshape(1, -1))
    
    y_pred.append(np.argmax(likelihoods))
    
    
accuracy = accuracy_score(y_test, y_pred)

accuracy

print(accuracy)

# Labeling all images using MTCNN



In [None]:
from PIL import Image

result = []

def chunks(lst, batch_size):
    for i in range(0, len(lst), batch_size):
        yield lst[i:i + batch_size]
        
data_dir = "data/"
batch_size = 10
all_files = [
    os.path.join(root, f)
    for root, _, files in os.walk(data_dir)
    for f in files if f.endswith("webp")
]
batches = list(chunks(all_files, batch_size))

for batch in tqdm(batches):
    for i in range(len(batch)):
        if batch[i].endswith("webp"):
            image_path = os.path.join(data_dir, batch[i])
            try:
                img = Image.open(image_path).convert('RGB')
                boxes, probs = mtcnn.detect(img)
                
                label = "face" if boxes is not None else "no face"
                
                # Append result
                result.append({
                    "filename": batch[i],
                    "label": label
                })

            except Exception as e:
                print("Error processing {batch[i]}: {e}")

  0%|          | 21/7219 [00:28<2:43:06,  1.36s/it]


KeyboardInterrupt: 

In [None]:
import csv 

output_csv = 'mtcnn_labels.csv'

with open(output_csv, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=["filename", "label"])
    writer.writeheader()
    for result in result:
        writer.writerow(result)

In [None]:

# Fit the scaler on smaller chunks of the training data
batch_size = 1000  # Adjust based on your available memory
for i in tqdm(range(0, len(X_train), batch_size)):
    batch = X_train[i:i + batch_size]
    scaler.partial_fit(batch)  # Fit incrementally using partial batches

# Transform training and test data in chunks
def transform_in_batches(data, batch_size, scaler):
    transformed_data = []
    for i in tqdm(range(0, len(data), batch_size)):
        batch = data[i:i + batch_size]
        transformed_batch = scaler.transform(batch)
        transformed_data.append(transformed_batch)
    return np.vstack(transformed_data)

X_train_scaled = transform_in_batches(X_train, batch_size, scaler)
X_test_scaled = transform_in_batches(X_test, batch_size, scaler)