In [None]:
!pip install opencv-python-headless
!pip install torch torchvision
!pip install pandas
!pip install matplotlib
!pip install facenet-pytorch

import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.models import alexnet
from torchvision import datasets, transforms
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
from google.colab import drive
from facenet_pytorch import MTCNN

drive.mount('/content/drive')

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:

categories_df = pd.read_csv('/content/drive/MyDrive/ECE 50024/category.csv')


categories = categories_df['Category'].tolist()


category_to_index = {category: idx for idx, category in enumerate(categories)}




ground_truth_df = pd.read_csv('/content/drive/MyDrive/ECE 50024/train.csv')


ground_truth_df['File Name'] = ground_truth_df['File Name'].str.split('.').str[0]
ground_truth_map = dict(zip(ground_truth_df['File Name'], ground_truth_df['Category']))
print(ground_truth_map)


label_idx = [category_to_index[ground_truth_map[file_name]] for file_name in ground_truth_df['File Name']]




mtcnn = MTCNN(keep_all=True)


def preprocess_image_facecascade(image_path):

    img = Image.open(image_path).convert('RGB')


    gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)

    faces = face_cascade.detectMultiScale(gray, 1.03, 5)

    if len(faces) > 0:
        (x, y, w, h) = faces[0]

        face = img.crop((x, y, x+w, y+h))

        face = transforms.Resize((224, 224))(face)

        face = transforms.ToTensor()(face)
        face = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(face)
        return face, True
    else:
        return None, False

def calculate_mean_std(dataset_path):
    image_paths = [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f))]
    channel_means = []
    channel_stds = []
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    for image_path in image_paths:
        img = Image.open(image_path).convert('RGB')
        img = transform(img)
        # Calculate mean and standard deviation for each channel
        mean = torch.mean(img, dim=(1, 2))
        std = torch.std(img, dim=(1, 2))

        channel_means.append(mean)
        channel_stds.append(std)

    # Convert lists of tensors to a single tensor
    channel_means_tensor = torch.stack(channel_means)
    channel_stds_tensor = torch.stack(channel_stds)

    # Compute overall mean and standard deviation across all images and channels
    all_channel_means = torch.mean(channel_means_tensor, dim=0)
    all_channel_stds = torch.mean(channel_stds_tensor, dim=0)

    return all_channel_means.numpy(), all_channel_stds.numpy()

def preprocess_image_facenet(image_path, mtcnn, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    img = Image.open(image_path).convert('RGB')
    img = transforms.Resize((224, 224))(img)
    img = transforms.ToTensor()(img) #/ 255.0
    #print(img)
    img = transforms.Normalize(mean=mean, std=std)(img)
    return img


{'0': 'Audrey Tautou', '1': 'Adam Sandler', '2': 'Anna Paquin', '3': 'Ava Gardner', '4': 'Amy Adams', '5': 'Angelina Jolie', '6': 'Bill Paxton', '7': 'Alan Alda', '8': 'Abigail Breslin', '9': 'Alicia Vikander', '10': 'Albert Finney', '11': 'Aaron Judge', '12': 'Beyonce Knowles', '13': 'Beyonce Knowles', '14': 'Andrew Lincoln', '15': 'Armin Mueller-Stahl', '16': 'Andy Murray', '17': 'Amy Ryan', '18': 'Alan Arkin', '19': 'Adrien Brody', '20': 'Analeigh Tipton', '21': 'Amanda Bynes', '22': 'Anne Bancroft', '23': 'Amy Ryan', '24': 'Albert Brooks', '25': 'Barbra Streisand', '26': 'Amanda Peet', '27': 'bella thorne', '28': 'Amanda Peet', '29': 'Andy Samberg', '30': 'Annette Bening', '31': 'Anthony Perkins', '32': 'Barbra Streisand', '33': 'Anthony Perkins', '34': 'Adrien Brody', '35': 'Alice Eve', '36': 'Berenice Bejo', '37': 'Amanda Crew', '38': 'AnnaSophia Robb', '39': 'Anne Bancroft', '40': 'Ava Gardner', '41': 'Billy Bob Thornton', '42': 'Aaron Paul', '43': 'AnnaSophia Robb', '44': 'Alan

"\ndef preprocess_image_facenet(image_path, mtcnn):\n    img = Image.open(image_path).convert('RGB')\n    # Detect faces using MTCNN\n    boxes, probs = mtcnn.detect(img)\n    # Select the first detected face if any\n    if boxes is not None:\n        # Convert box coordinates to integers\n        boxes = boxes[0].astype(int)\n        # Extract face region\n        face = img.crop((boxes[0], boxes[1], boxes[2], boxes[3]))\n        # Resize face to (224, 224) for FaceNet\n        face = transforms.Resize((224, 224))(face)\n        # Convert to tensor and normalize\n        face = transforms.ToTensor()(face)\n        face = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(face)\n        # Generate face embedding using FaceNet\n        return face, True\n    else:\n        # Resize original image to (224, 224) for FaceNet\n        img = transforms.Resize((224, 224))(img)\n        # Convert to tensor and normalize\n        img = transforms.ToTensor()(img)\n      

In [None]:
class CelebrityDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.mean = mean
        self.std = std

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = preprocess_image_facenet(img_path, mtcnn, self.mean, self.std)
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

class CelebrityTensorDataset(Dataset):
    def __init__(self, tensor_paths, labels):
        self.tensor_paths = tensor_paths
        self.labels = labels

    def __len__(self):
        return len(self.tensor_paths)

    def __getitem__(self, idx):
        tensor_path = self.tensor_paths[idx]
        tensor = torch.load(tensor_path)  # Load the preprocessed tensor
        label = self.labels[idx]
        return tensor, label

class CelebrityTestDataset(Dataset):
    def __init__(self, image_paths, transform=None, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
        self.image_paths = image_paths
        self.transform = transform
        self.mean = mean
        self.std = std

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = preprocess_image_facenet(img_path, mtcnn, self.mean, self.std)
        if self.transform:
            img = self.transform(img)
        return img, os.path.basename(img_path)

In [None]:
mean = [0.5957162,  0.4629442,  0.40489063]
std = [0.22739226, 0.20087235, 0.18877847]
image_paths = []
labels = []

celebrities_dir = '/content/drive/MyDrive/ECE 50024/cropped_large_tensors'
from pathlib import Path
import glob

celebrities_dir_path = Path(celebrities_dir)
image_files = glob.glob(f"{celebrities_dir}/*.pt")  # Adjust the pattern as needed
print(len(image_files))
for image_file in image_files:
#for image_file in glob.iglob(f"{celebrities_dir}/*.pt"):
    image_name = Path(image_file).name.split('.')[0]
    #print(image_name)
    if image_name in ground_truth_map:
        label_str = ground_truth_map[image_name]
        label_idx = category_to_index[label_str]
        image_paths.append(image_file)
        labels.append(label_idx)
print(len(image_paths))

69579
69540


In [None]:
import torchvision.utils as vutils
import torchvision.models as models

train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=0.2, random_state=42)
print(max(train_labels))
print(min(train_labels))

train_dataset = CelebrityTensorDataset(train_paths, train_labels)#, mean=mean, std=std)
val_dataset = CelebrityTensorDataset(val_paths, val_labels) #mean=mean, std=std)


train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=128, num_workers=4)


num_classes = len(categories)
print(num_classes)



model = models.convnext_small(pretrained=True) # Validation Loss: 1.0238178512094895, Validation Accuracy: 80.74489502444636% Epoch 3

num_ftrs = model.classifier[-1].in_features


model.classifier[-1] = nn.Linear(num_ftrs, num_classes)

checkpoint = torch.load("/content/drive/MyDrive/ECE 50024/checkpoints/convnext_small_outliers_checkpoint_epoch_1.pth.tar")

model.load_state_dict(checkpoint)

criterion = nn.CrossEntropyLoss()

99
0
100


Downloading: "https://download.pytorch.org/models/convnext_small-0c510722.pth" to /root/.cache/torch/hub/checkpoints/convnext_small-0c510722.pth
100%|██████████| 192M/192M [00:02<00:00, 95.4MB/s]


In [None]:
from tqdm import tqdm
def validate_model(model, val_loader, criterion, device=torch.device('cuda')):
    train_losses = []
    val_losses = []
    model.to(device)

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
          val_progress_bar = tqdm(enumerate(val_loader), total=len(val_loader), desc=f'validation', leave=False)
          for i, (images, labels) in val_progress_bar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = val_loss / len(val_loader.dataset)
    val_losses.append(val_loss)
    print(f'Validation Loss: {val_loss}, Validation Accuracy: {100 * correct / total}%')

    return train_losses, val_losses


In [None]:
train_losses, val_losses = validate_model(model, val_loader, criterion)

                                                             

Validation Loss: 0.9955680465814811, Validation Accuracy: 81.478285878631%




In [None]:

test_dir = '/content/drive/MyDrive/ECE 50024/cropped_test'
test_images = [img for img in os.listdir(test_dir) if img.endswith('.jpg')]


test_images.sort(key=lambda x: int(x.split('.')[0]))

celebrities_dir_path = Path(test_dir)
image_files = glob.glob(f"{test_dir}/*.jpg")
print(len(image_files))
for image_file in image_files:
#for image_file in glob.iglob(f"{celebrities_dir}/*.pt"):
    image_name = Path(image_file).name.split('.')[0]
    #print(image_name)
    if image_name in ground_truth_map:
        label_str = ground_truth_map[image_name]
        label_idx = category_to_index[label_str]
        image_paths.append(image_file)
        labels.append(label_idx)

4977


In [None]:
test_dataset = CelebrityTestDataset(image_paths=[os.path.join(test_dir, img) for img in test_images], mean = mean, std=std)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model.eval()
model.to('cuda')


predictions = []
for inputs, paths in test_loader:
    inputs = inputs.to('cuda')
    outputs = model(inputs)
    _, preds = torch.max(outputs, 1)
    predictions.extend(preds.cpu().numpy())


category_names = [categories[pred] for pred in predictions]


df_predictions = pd.DataFrame({
    'Id': [int(path.split('.')[0]) for _, path in test_loader.dataset],
    'Category': category_names
})


df_predictions.to_csv('/content/drive/MyDrive/ECE 50024/Submissions/convnext_small_augmentation_outliers.csv', index=False)

print("Predictions saved to CSV.")

Predictions saved to CSV.
