In [2]:
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from PIL import Image
import matplotlib.pyplot as plt

In [3]:
data_dir = "Celebrity Faces Dataset/Celebrity Faces Dataset"  # clean folder
classes = sorted([f for f in os.listdir(data_dir) if not f.startswith('.')])

In [4]:
image_paths = []
labels = []

for idx, cls in enumerate(classes):
    cls_folder = os.path.join(data_dir, cls)
    for file in os.listdir(cls_folder):
        if file.lower().endswith(('.jpg', '.png', '.jpeg')):
            image_paths.append(os.path.join(cls_folder, file))
            labels.append(idx)

image_paths = np.array(image_paths)
labels = np.array(labels)

print(f"Detected {len(classes)} classes and {len(image_paths)} images.")


Detected 17 classes and 1700 images.


In [5]:
num_classes = len(np.unique(labels))
images_per_class = 100
folds = 10
images_per_fold = images_per_class // folds

In [6]:
images_by_class = []
for class_i in range(num_classes):
    images_by_class.append([])

In [7]:
for path, label in zip(image_paths, labels):
    images_by_class.append(path)

In [8]:
for class_i in range(num_classes):
    images_by_class[class_i].sort()

In [10]:
folds_by_class = []
for class_i in range(num_classes):
    class_images = images_by_class[class_i]
    class_folds = []
    for fold in range(folds):
        start = fold * images_per_fold
        end = start + images_per_fold
        fold_images = class_images[start:end]
        class_folds.append(fold_images)
    folds_by_class.append(class_folds)

In [16]:
device = torch.device("cpu")
alexnet = models.alexnet(pretrained=True).to(device)

In [17]:
alexnet.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [19]:
feature_extractor = torch.nn.Sequential(*list(alexnet.children())[:-1]).to(device)
feature_extractor

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): AdaptiveAvgPool2d(output_size=(6, 6))
)

In [22]:
transform_image = transforms.Compose([transforms.Resize((100,100)), transforms.ToTensor(), transforms.Normalize(mean=[0.495, 0.456, 0.406], std= [0.229, 0.224, 0.225])] )
transform_image

Compose(
    Resize(size=(100, 100), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.495, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)