# Assignment 4: Wheres Waldo?
### Name: Bryan Christ
In this assignment, you will develop an object detection algorithm to locate Waldo in a set of images. You will develop a model to detect the bounding box around Waldo in 20 provided images. Your final task is to submit your predictions on Kaggle for evaluation.

### Imports

In [1]:
import os
import pandas as pd
from PIL import Image
import torch
from torchvision.io import read_image
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F
from tqdm import tqdm
import csv

# Preprocessing

In [5]:
class WaldoDataset(torch.utils.data.Dataset):
    def __init__(self, annotations_file, img_dir, transforms=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transforms = transforms

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        image = F.to_tensor(image)
        
        # Read bounding box data, ensuring all are converted to float
        box_data = self.img_labels.iloc[idx, 4:8].values
        boxes = []
        for item in box_data:
            try:
                boxes.append(float(item))
            except ValueError as e:
                raise ValueError(f"Error converting bounding box data to float: {e}")

        # Create tensors
        boxes = torch.as_tensor([boxes], dtype=torch.float32)
        labels = torch.ones((1,), dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((1,), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target


# Example usage:
# Create the dataset
train_dataset = WaldoDataset(annotations_file= 'wheres-waldo/annotations.csv', img_dir='wheres-waldo/train')
from sklearn.model_selection import train_test_split

# Extract images and annotations
images = [item[0] for item in train_dataset]
annotations = [item[1] for item in train_dataset]

# Set random seed for reproducibility
torch.manual_seed(42)

# Perform random train/test split
images_train, images_test, annotations_train, annotations_test = train_test_split(images, annotations, test_size=0.2, random_state=42)

train_dataset = list(zip(images_train, annotations_train))
val_dataset = list(zip(images_test, annotations_test))

# Now, you can use this dataset with a DataLoader to train your model
from torch.utils.data import DataLoader

train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=lambda x: tuple(zip(*x))
)

val_data_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    collate_fn=lambda x: tuple(zip(*x))
)

In [7]:
val_dataset[1]

(tensor([[[0.3725, 0.4824, 0.5961,  ..., 0.9569, 0.9686, 0.9490],
          [0.3882, 0.4941, 0.5843,  ..., 0.9294, 0.9373, 0.9294],
          [0.4275, 0.5333, 0.5804,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.3647, 0.6392, 0.4549,  ..., 0.4275, 0.4745, 0.4392],
          [0.3725, 0.5490, 0.5373,  ..., 0.2392, 0.4039, 0.2078],
          [0.3529, 0.4549, 0.6039,  ..., 0.7686, 0.9961, 0.2078]],
 
         [[0.3569, 0.4667, 0.5804,  ..., 0.9882, 1.0000, 0.9843],
          [0.3725, 0.4784, 0.5686,  ..., 0.9843, 1.0000, 0.9922],
          [0.4196, 0.5255, 0.5725,  ..., 0.0275, 0.0667, 0.0353],
          ...,
          [0.3843, 0.6627, 0.4706,  ..., 0.1059, 0.1843, 0.1647],
          [0.3922, 0.5725, 0.5529,  ..., 0.0039, 0.1922, 0.0157],
          [0.3725, 0.4824, 0.6196,  ..., 0.5765, 0.8118, 0.0431]],
 
         [[0.3608, 0.4706, 0.5843,  ..., 1.0000, 1.0000, 1.0000],
          [0.3765, 0.4824, 0.5725,  ..., 1.0000, 1.0000, 1.0000],
          [0.4235, 0.5294, 0.5765,  ...,

# Create your model here 

# Submission File 

In [None]:
def write_predictions_to_csv(predictions, csv_file_path):
    """
    Write predictions to a CSV file.

    Parameters:
    predictions (list of tuples): A list of predictions, where each prediction is a tuple
                                  containing (filename, xmin, ymin, xmax, ymax).
    csv_file_path (str): The path to the CSV file where the predictions will be saved.
    """

    # CSV header
    header = ['filename', 'xmin', 'ymin', 'xmax', 'ymax']

    # Write to CSV
    with open(csv_file_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)  # Write the header
        for prediction in predictions:
            writer.writerow(prediction)  # Write the prediction rows

# Example usage:
predictions = [
    ("1.jpg", 380, 235, 394, 231),
    ("2.jpg", 972, 628, 1590, 655),
    ("3.jpg", 623, 955, 641, 976),
    # ... add other predictions as needed ...
]

# Write to 'predictions.csv'
write_predictions_to_csv(predictions, 'predictions.csv')
