In [1]:
# !unzip tennis_court_det_dataset.zip

In [1]:
import torch 
import cv2
import json
import numpy as np

from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

In [2]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

cuda


In [16]:
class keypointDataset (Dataset):
    
    def __init__(self, img_dir, datafile):
        self.dir=img_dir
        
        with open(datafile, 'r') as f:
            self.data= json.load(f)
            
        self.transforms= transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize( mean=[.4, .4, .4] , std=[.2, .2, .2])
        ])
        
    def __len__(self):
        return len(self.data)  
    
    def __getitem__(self, idx):
        
        item=self.data[idx] #it will read the json file
        image= cv2.imread(f"{self.dir}/{item['id']}.png")     

        height,width=image.shape[:2]
        
        image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image= self.transforms(image)
        
        kps=np.array(item['kps']).flatten()
        
        kps= kps.astype(np.float32)
        
        ##now the modified positions of the labels
        
        kps[ :: 2] *= 224/width
        
        kps[1:: 2] *= 224/height
        
        return image, kps
         

In [17]:
train_dataset = keypointDataset("data/images","data/data_train.json")
valid_dataset = keypointDataset("data/images","data/data_val.json")


train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle= True)
val_dataloader = DataLoader(valid_dataset, batch_size=16, shuffle= True)

In [18]:
##resnet50 is not pretrained for recognising the boundary points so last layer would be altered

model=models.resnet50(pretrained = True)

#last layer is a fully connected layer of 14*2 (as there are 14 kps of x and y)

model.fc=torch.nn.Linear( model.fc.in_features, 14*2)


In [19]:
model=model.to(device)

In [20]:
criterion= torch.nn.MSELoss()
optimizer= torch.optim.Adam(model.parameters(), lr=1e-3)


In [21]:
for epochs in range(10):
    for i , (image, kps) in enumerate(train_dataloader):
        image  = image.to(device)
        kps = kps.to(device)
        
        optimizer.zero_grad()
        output_kps = model(image)
        
        loss = criterion( output_kps, kps)
        loss.backward()
        optimizer.step()
        
        if i % 100== 0 : print(f' epoch : {epochs + 1} loss = {loss.item()}')
        
        
    

 epoch : 1 loss = 15116.189453125
 epoch : 1 loss = 59.07713317871094
 epoch : 1 loss = 41.92870330810547
 epoch : 1 loss = 37.23984909057617
 epoch : 1 loss = 38.38298797607422
 epoch : 2 loss = 24.739696502685547
 epoch : 2 loss = 20.082767486572266
 epoch : 2 loss = 9.61832332611084
 epoch : 2 loss = 10.943277359008789
 epoch : 2 loss = 12.807669639587402
 epoch : 3 loss = 12.490171432495117
 epoch : 3 loss = 49.93429946899414
 epoch : 3 loss = 11.056880950927734
 epoch : 3 loss = 87.69979858398438
 epoch : 3 loss = 8.341574668884277
 epoch : 4 loss = 5.892822742462158
 epoch : 4 loss = 57.11442947387695
 epoch : 4 loss = 7.442975044250488
 epoch : 4 loss = 6.252378940582275
 epoch : 4 loss = 12.140094757080078
 epoch : 5 loss = 16.12542724609375
 epoch : 5 loss = 10.175070762634277
 epoch : 5 loss = 12.760015487670898
 epoch : 5 loss = 12.888014793395996
 epoch : 5 loss = 3.2263920307159424
 epoch : 6 loss = 6.822922706604004
 epoch : 6 loss = 14.233360290527344
 epoch : 6 loss = 2

In [23]:
torch.save(model.state_dict(), "keypoints_model.pth")