In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp /content/drive/MyDrive/data.zip .
!unzip data.zip

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

import json
import cv2
import numpy as np

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create PyTorch Dataset

In [None]:
class KeypointsDataset(Dataset):
  def __init__(self, img_dir, data_file):
    self.img_dir = img_dir

    with open(data_file, 'r') as f:
      self.data = json.load(f)

    self.transforms = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    item = self.data[idx]
    img = cv2.imread(f"{self.img_dir}/{item["id"]}.png")
    h, w = img.shape[:2]

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = self.transforms(img)

    kps = np.array(item["kps"]).flatten()
    kps = kps.astype(np.float32)

    # Normalize keypoints to [0, 1] range
    kps[::2] /= w
    kps[1::2] /= h

    return img, kps

In [None]:
train_dataset = KeypointsDataset("data/images", "data/data_train.json")
val_dataset = KeypointsDataset("data/images", "data/data_val.json")

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)

# Create Model

In [None]:
model = models.resnet50(pretrained=True)

# Replace last layer (fully connected layer) to match number of (x, y) coordinates we expect for outputs
model.fc = torch.nn.Linear(model.fc.in_features, 14*2)
model = model.to(device)

# Train Model

In [None]:
# L2 for small errors, L1 for large errors
# Quadratic when |x| < beta, linear when |x| >= beta
criterion = torch.nn.SmoothL1Loss(beta=0.01)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
epochs=20
for epoch in range(epochs):
    for i, (imgs,kps) in enumerate(train_loader):
        imgs = imgs.to(device)
        kps = kps.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print(f"Epoch {epoch}, iter {i}, loss: {loss.item()}")

In [None]:
torch.save(model.state_dict(), "keypoints_model.pth")

### All code and training was done in Google Colab. Model was saved to "keypoints_model.pth"