# Assignment 4: Wheres Waldo?
### Name: Your Name
In this assignment, you will develop an object detection algorithm to locate Waldo in a set of images. You will develop a model to detect the bounding box around Waldo. Your final task is to submit your predictions on Kaggle for evaluation.

### Imports

In [None]:
import os
import pandas as pd
from PIL import Image
import torch
from torchvision.io import read_image
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F
from tqdm import tqdm
import csv

# Preprocessing

In [2]:
class WaldoDataset(torch.utils.data.Dataset):
    def __init__(self, annotations_file, img_dir, transforms=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transforms = transforms

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        image = F.to_tensor(image)
        
        # Read bounding box data, ensuring all are converted to float
        box_data = self.img_labels.iloc[idx, 4:8].values
        boxes = []
        for item in box_data:
            try:
                boxes.append(float(item))
            except ValueError as e:
                raise ValueError(f"Error converting bounding box data to float: {e}")

        # Create tensors
        boxes = torch.as_tensor([boxes], dtype=torch.float32)
        labels = torch.ones((1,), dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((1,), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target


# Example usage:
# Create the dataset
train_dataset = WaldoDataset(annotations_file= 'path/to/train_annotation.csv', img_dir='path/to/train')
val_dataset = WaldoDataset(annotations_file= 'path/to/val_annotation.csv', img_dir='path/to/val')

# Now, you can use this dataset with a DataLoader to train your model
from torch.utils.data import DataLoader

train_data_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=lambda x: tuple(zip(*x))
)

val_data_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    collate_fn=lambda x: tuple(zip(*x))
)

# Create your model here 

# Submission File 

In [None]:
def write_predictions_to_csv(predictions, csv_file_path):
    """
    Write predictions to a CSV file.

    Parameters:
    predictions (list of tuples): A list of predictions, where each prediction is a tuple
                                  containing (filename, xmin, ymin, xmax, ymax).
    csv_file_path (str): The path to the CSV file where the predictions will be saved.
    """

    # CSV header
    header = ['filename', 'xmin', 'ymin', 'xmax', 'ymax']

    # Write to CSV
    with open(csv_file_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)  # Write the header
        for prediction in predictions:
            writer.writerow(prediction)  # Write the prediction rows

# Example usage:
predictions = [
    ("1.jpg", 380, 235, 394, 231),
    ("2.jpg", 972, 628, 1590, 655),
    ("3.jpg", 623, 955, 641, 976),
    # ... add other predictions as needed ...
]

# Write to 'predictions.csv'
write_predictions_to_csv(predictions, 'predictions.csv')
