## Adel Movahedian 400102074

In [1]:
!pip install torch



In [None]:
import os
import cv2
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

DATA_ROOT = "SportsMOT_example/dataset"
TRAIN_DIR = os.path.join(DATA_ROOT, "train")
# VAL_DIR = os.path.join(DATA_ROOT, "val")
# TEST_DIR = os.path.join(DATA_ROOT, "test") not in the dataset

sample_seq = os.path.join(TRAIN_DIR, "football")
print("Sample sequence contents:")
print(os.listdir(sample_seq))


class SportsMOTDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.sequences = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]

        self.samples = []
        for seq in self.sequences:
            seq_path = os.path.join(root_dir, seq)
            img_dir = os.path.join(seq_path, "img1")
            gt_file = os.path.join(seq_path, "gt", "gt.txt")

            try:
                annotations = pd.read_csv(
                    gt_file, header=None,
                    names=['frame', 'id', 'x', 'y', 'w', 'h', 'conf', 'cls', 'vis']
                )
                annotations = annotations[annotations['cls'] == 1]
            except FileNotFoundError:
                print(f"Annotation file not found for sequence: {seq}")
                continue

            frame_files = sorted([
                f for f in os.listdir(img_dir)
                if f.endswith('.jpg') and f.split('.')[0].isdigit()
            ])

            for frame_file in frame_files:
                frame_num = int(frame_file.split('.')[0])
                frame_anns = annotations[annotations['frame'] == frame_num]
                self.samples.append({
                    'image_path': os.path.join(img_dir, frame_file),
                    'annotations': frame_anns
                })

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        image = cv2.imread(sample['image_path'])
        if image is None:
            raise ValueError(f"Failed to load image: {sample['image_path']}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        anns = sample['annotations']
        boxes = anns[['x', 'y', 'w', 'h']].values.astype(float)
        boxes[:, 2:] += boxes[:, :2]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((len(anns),), dtype=torch.int64)
        if self.transform:
            image = self.transform(image)
        return image, {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([idx]),
            'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]),
            'iscrowd': torch.zeros((len(anns),), dtype=torch.int64)
        }
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = SportsMOTDataset(TRAIN_DIR, transform=transform)

def collate_fn(batch):
    return tuple(zip(*batch))

BATCH_SIZE = 4
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn
)

print(f"Train samples: {len(train_dataset)}")


Sample sequence contents:
['gt', 'img1', 'seqinfo.ini']
Train samples: 1727


In [42]:
# Test the dataloader
for images, targets in train_loader:
    print(f"Batch size: {len(images)}")
    print(f"First image shape: {images[0].shape}")
    print(f"First target boxes: {targets[0]['boxes'][:2]}")
    break


Batch size: 4
First image shape: torch.Size([3, 640, 640])
First target boxes: tensor([[ 23., 308.,  59., 393.],
        [274., 382., 317., 476.]])


In [43]:
import cv2
from IPython.display import Video

def create_video_from_sequence(dataset, output_path="sample_clip2.mp4", idx=0, fps=10):
    sequence_dir = dataset.sequences[idx]
    seq_path = os.path.join(dataset.root_dir, sequence_dir, "img1")
    images = sorted([os.path.join(seq_path, img) for img in os.listdir(seq_path) if img.endswith(".jpg")])
    gt_file = os.path.join(dataset.root_dir, sequence_dir, "gt", "gt.txt")
    annotations = pd.read_csv(gt_file, header=None, names=['frame', 'id', 'x', 'y', 'w', 'h', 'conf', 'cls', 'vis'])
    annotations = annotations[annotations['cls']==1]
    sample_image = cv2.imread(images[0])
    height, width, _ = sample_image.shape
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    for img_path in images:
        frame_num = int(os.path.basename(img_path).split(".")[0])
        frame = cv2.imread(img_path)
        frame_anns = annotations[annotations['frame'] == frame_num]
        for _, row in frame_anns.iterrows():
            x, y, w, h = int(row['x']), int(row['y']), int(row['w']), int(row['h'])
            color = (0, 255, 0)  # Green box
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            label = f"ID: {int(row['id'])}"
            cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        video_writer.write(frame)

    video_writer.release()
    print(f"Video saved to {output_path}")

create_video_from_sequence(train_dataset, output_path="train_clip2.mp4", idx=0, fps=20)


Video saved to train_clip2.mp4
