In [2]:
from torch.utils.data import Dataset
import torch
import numpy as np
import cv2

class TrackNetDataset(Dataset):
    def __init__(self, frame_paths, indices, heatmaps, img_size):
        self.frame_paths = frame_paths
        self.indices = indices
        self.heatmaps = heatmaps
        self.W, self.H = img_size

    def _load_frame(self, idx):
        frame = cv2.imread(self.frame_paths[idx])
        if frame is None:
            raise RuntimeError(f"Missing frame: {self.frame_paths[idx]}")

        frame = cv2.resize(frame, (self.W, self.H))
        frame = frame.astype(np.float32) / 255.0
        frame = (frame - 0.5) / 0.5
        return frame

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, i):
        idx = self.indices[i]

        f1 = self._load_frame(idx - 2)
        f2 = self._load_frame(idx - 1)
        f3 = self._load_frame(idx)

        x = np.concatenate([f1, f2, f3], axis=2)
        x = torch.from_numpy(x).permute(2, 0, 1)

        y = torch.from_numpy(self.heatmaps[i]).unsqueeze(0)

        return x, y


In [3]:
import numpy as np
import cv2

def generate_heatmap(x, y, H, W, sigma=3):
    heatmap = np.zeros((H, W), dtype=np.float32)

    if 0 <= x < W and 0 <= y < H:
        heatmap[y, x] = 1.0

    heatmap = cv2.GaussianBlur(heatmap, (7, 7), sigma)
    return heatmap / (heatmap.max() + 1e-6)


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TrackNet(nn.Module):
    def __init__(self):
        super(TrackNet, self).__init__()

        self.conv1 = nn.Conv2d(9, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(256, 1, kernel_size=1)

        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """
        Input:  (B, 9, H, W)
        Output: (B, 1, H, W)
        """
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = self.sigmoid(self.conv5(x))
        return x


In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

def train_tracknet(
    frame_paths,
    labels,
    dataset_class,
    batch_size=4,
    epochs=50,
    lr=1e-4,
    save_path="tracknet.pth",
    device=None
):

    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"

    # -------------------------
    # MODEL
    # -------------------------
    model = TrackNet().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()

    # -------------------------
    # DATA
    # -------------------------
    dataset = dataset_class(frame_paths, labels)
    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0,
        pin_memory=True if device == "cuda" else False
    )

    # -------------------------
    # TRAINING LOOP
    # -------------------------
    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            pred = model(x)
            loss = criterion(pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(loader)
        print(f"[Epoch {epoch+1}/{epochs}] Loss: {avg_loss:.6f}")

    # -------------------------
    # SAVE MODEL
    # -------------------------
    torch.save(model.state_dict(), save_path)
    print(f"âœ… TrackNet model saved to: {save_path}")

    return model


In [6]:
import numpy as np
import torch
import os
import json

from torch import nn
from torch.utils.data import DataLoader, random_split



# -------------------------
# CONFIG
# -------------------------

def main_train():
    FRAME_DIR = "frames"
    ANNOTATION_FILE = "annotations.json"
    INPUT_W, INPUT_H = 960, 540
    HEATMAP_SIGMA = 3
    BATCH_SIZE = 8
    EPOCHS = 50
    LR = 1e-4
    VAL_SPLIT = 0.2
    MODEL_PATH = "tracknet.pth"

    frame_paths = sorted([
        os.path.join(FRAME_DIR, f)
        for f in os.listdir(FRAME_DIR)
        if f.endswith(".jpg")
    ])

    print("Frame_paths loaded")

    # Load annotations
    with open(ANNOTATION_FILE) as f:
        ann = json.load(f)

    print("Annotations loaded")
    indices, heatmaps = [], []
    for k in sorted(ann.keys(), key=int):
        idx = int(k)
        if idx >= 2:
            x, y = ann[k]
            indices.append(idx)
            heatmaps.append(
                generate_heatmap(x, y, INPUT_W, INPUT_H, HEATMAP_SIGMA)
            )

    heatmaps = np.array(heatmaps, dtype=np.float32)
    print("Heatmaps generated")

    dataset = TrackNetDataset(
        frame_paths, indices, heatmaps, (INPUT_W, INPUT_H)
    )

    print("Dataset created")

    # Train/val split
    val_len = int(len(dataset) * VAL_SPLIT)
    train_len = len(dataset) - val_len
    train_ds, val_ds = random_split(dataset, [train_len, val_len])

    train_loader = DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_ds, BATCH_SIZE, shuffle=False, num_workers=4)
    print("Data loaders created")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Device: ", device)

    # Model
    model = TrackNet().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    criterion = nn.BCEWithLogitsLoss()
    print("Model created")

    best_val = float("inf")
    print("Best val: ", best_val)

    # ---------------- TRAIN LOOP ----------------
    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            pred = model(x)
            loss = criterion(pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                val_loss += criterion(model(x), y).item()

        val_loss /= len(val_loader)

        print(f"[{epoch+1:02d}] Train: {train_loss:.4f} | Val: {val_loss:.4f}")

        if val_loss < best_val:
            best_val = val_loss
            torch.save(model.state_dict(), MODEL_PATH)
            print("âœ… Saved best model")

    print("ðŸŽ¯ Training complete")

In [7]:
main_train()

FileNotFoundError: [Errno 2] No such file or directory: 'frames'