In [23]:
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("sbaghbidi/human-faces-object-detection")

# print("Path to dataset files:", path)

In [36]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torchvision.transforms as T
import torchvision.models as models
from albumentations.pytorch import ToTensorV2
import torchvision.transforms.functional as F

# Check device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")


Using device: mps


  check_for_updates()


In [30]:
class FaceDataset(Dataset):
    def __init__(self, annotations, images_dir, transform=None):
        self.annotations = annotations  # No need to reset index here
        self.images_dir = images_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        # Use the original indices to access the DataFrame rows
        img_name = self.annotations.iloc[idx]['image_name']
        img_path = os.path.join(self.images_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        
        # Get bounding box coordinates
        bbox = self.annotations.iloc[idx][['x0', 'y0', 'x1', 'y1']].values.astype(np.float32)
        bbox = torch.tensor(bbox)
        
        if self.transform:
            image, bbox = self.transform(image, bbox)
        
        return image, bbox


In [35]:
# Path configurations
data_dir = '/Users/yd211/Documents/GitHub/ECE 685 Fall 2024/Homework 3/data'
images_dir = os.path.join(data_dir, 'images')
csv_path = os.path.join(data_dir, 'faces.csv')

# Load annotations
annotations = pd.read_csv(csv_path)
print(f"Total samples: {len(annotations)}")

# use train test split from sklearn

train_annotations, val_annotations = train_test_split(annotations, test_size=0.2, random_state=12)
print(f"Train samples: {len(train_annotations)}")
print(f"Validation samples: {len(val_annotations)}")



Total samples: 3350
Train samples: 2680
Validation samples: 670


In [28]:
# Define image size (resize for consistency and GPU memory constraints)
IMAGE_SIZE = 224

# Define transformation pipeline
transform = T.Compose([
    RandomAffineTransform(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10),
    T.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    T.ToTensor(),
])

# Instantiate datasets
train_dataset = FaceDataset(train_annotations, images_dir, transform=transform)
test_dataset = FaceDataset(test_annotations, images_dir, transform=transform)


In [29]:
def plot_image_with_bbox(image, bbox, title=""):
    # image: tensor
    # bbox: tensor [x0, y0, x1, y1]
    image = image.permute(1, 2, 0).cpu().numpy()
    plt.imshow(image)
    # Scale bbox to image size
    x0, y0, x1, y1 = bbox
    plt.plot([x0, x1, x1, x0, x0], [y0, y0, y1, y1, y0], 'r-', linewidth=2)
    plt.title(title)
    plt.axis('off')
    plt.show()

# Sample and plot twice to check randomness and consistency
for i in range(2):
    idx = random.randint(0, len(train_dataset)-1)
    image, bbox = train_dataset[idx]
    plot_image_with_bbox(image, bbox, title=f"Sample {i+1}")


TypeError: Compose.__call__() takes 2 positional arguments but 3 were given