# Run in Google Colab

In [None]:
# https://drive.google.com/file/d/1lhAaeQCmk2y440PmagA0KmIVBIysVMwu/view?usp=drive_link

In [None]:
!gdown https://drive.google.com/uc?id=1lhAaeQCmk2y440PmagA0KmIVBIysVMwu

In [None]:
!unzip tennis_court_det_dataset.zip

# Start code

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

import json
import cv2
import numpy as np

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Create Torch Dataset

In [7]:

class KeypointsDataset(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file, "r") as f:
            self.data = json.load(f)

        # standardize and normalize image
        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    # get length of data 
    def __len__(self):
        return len(self.data)
    
    # images will be taken to training one by one
    def __getitem__(self, idx):
        item = self.data[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h,w = img.shape[:2]

        # cv2 reads image in BGR format, i need it in RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # standardize and normalize
        img = self.transforms(img)
        # get keypoints into 1D array
        kps = np.array(item['kps']).flatten()
        kps = kps.astype(np.float32)

        # Adjust x cordinates
        kps[::2] *= 224.0 / w
        # Adjust y cordinates
        kps[1::2] *= 224.0 / h

        return img, kps




In [None]:
# specify training and validation dataset

train_dataset = KeypointsDataset("data/images", "data/data_train.json")
val_dataset = KeypointsDataset("data/images", "data/data_val.json")

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)


# Create model

In [None]:
model = models.resnet50(pretrained=True)
# resnet was not trained for keypoints extraction, so last layer needs to be replaced
model.fc = torch.nn.Linear(model.fc.in_features, 14*2)



In [None]:
model = model.to(device)

# Train model

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [None]:
epochs = 20
for epoch in range(epochs):
    for i, (imgs,kps) in enumerate(train_loader):
        imgs = imgs.to(device)
        kps = kps.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)

        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print(f"Epoch: {epoch+1}/{epochs}, Iter: {i+1}/{len(train_loader)}, Loss: {loss.item()}")

In [None]:
torch.save(model.state_dict(), "keypoints_model.pth")

In [None]:
from google.colab import files
files.download("keypoints_model.pth")