In [None]:
from google.colab import drive

drive.mount("/content/drive")

In [None]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader

In [None]:
PROJECT_DIR = "nexcar-challenge"
DATA_DIR = "data"

train_csv_path = f"/content/drive/MyDrive/{PROJECT_DIR}/{DATA_DIR}/train.csv"
train_videos_folder = f"/content/drive/MyDrive/{PROJECT_DIR}/{DATA_DIR}/train/"

df = pd.read_csv(train_csv_path)
df.head()

In [None]:
class ImageDataset(Dataset):
    def __init__(self, csv_path, videos_folder, transform=None):
        self.data = pd.read_csv(csv_path)
        self.videos_folder = videos_folder
        self.transform = transform  # Any image transformations (e.g., augmentations)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        img_path = os.path.join(
            self.videos_folder, f"{str(int(row['id'])).zfill(5)}.jpg"
        )
        image = Image.open(img_path).convert("RGB")

        # Get labels or features from CSV
        label = row["label"]

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label)  # Convert label to tensor


transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),  # Resize to a standard size
        transforms.ToTensor(),  # Convert to PyTorch Tensor
    ]
)

dataset = ImageDataset(train_csv_path, train_videos_folder, transform=transform)

image, label = dataset[0]
print(image.shape, label)

In [None]:
dataloader = DataLoader(dataset, shuffle=True)