In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import json
import cv2
import numpy as np
from torch.utils.data import Subset

##create torch dataset


In [None]:
class KeypointsDataset(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file, "r") as f:
            self.data = json.load(f)

        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h,w = img.shape[:2]

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transforms(img)
        kps = np.array(item['kps']).flatten()
        kps = kps.astype(np.float32)

        kps[::2] *= 224.0 / w # Adjust x coordinates
        kps[1::2] *= 224.0 / h # Adjust y coordinates

        return img, kps

In [None]:
train_dataset = KeypointsDataset("/content/drive/MyDrive/data/images","/content/drive/MyDrive/data/data_train.json")
val_dataset = KeypointsDataset("/content/drive/MyDrive/data/images","/content/drive/MyDrive/data/data_val.json")




In [None]:
# Limit the training dataset to 4000 images
num_train_samples = 3500
train_indices = np.random.choice(len(train_dataset), num_train_samples, replace=False)
train_subset = Subset(train_dataset, train_indices)

# DataLoader for the subset of training data
train_loader = DataLoader(train_subset, batch_size=10, shuffle=True)

# DataLoader for the validation data
val_loader = DataLoader(val_dataset, batch_size=10, shuffle=True)

##Create model

In [None]:
model = models.resnet50(pretrained=True)
model.fc =torch.nn.Linear(model.fc.in_features
                          ,14*2) #replae last layer of network

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 119MB/s]


In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [None]:
!nvidia-smi

Sat Jun 15 06:51:09 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   46C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
epochs = 30
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    for epoch in range(epochs):
        for i, (imgs, kps) in enumerate(train_loader):
            imgs = imgs.to(device)
            kps = kps.to(device)
            model.to(device)  # Move the model to the device
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, kps)
            loss.backward()
            optimizer.step()

            if i % 10 == 0:
                print(f"epoch {epoch}, iter {i}, loss {loss.item()}")

except KeyboardInterrupt:
    # Save the model if the program gets interrupted
    torch.save(model.state_dict(), 'interrupted_model.pth')
    print("Model saved successfully.")

epoch 0, iter 0, loss 15006.146484375
epoch 0, iter 10, loss 15009.3359375
epoch 0, iter 20, loss 14618.376953125
epoch 0, iter 30, loss 13294.185546875
epoch 0, iter 40, loss 13578.8515625
epoch 0, iter 50, loss 13313.326171875
epoch 0, iter 60, loss 12738.8271484375
epoch 0, iter 70, loss 12499.541015625
epoch 0, iter 80, loss 12431.49609375
epoch 0, iter 90, loss 10849.7490234375
epoch 0, iter 100, loss 11141.986328125
epoch 0, iter 110, loss 11423.27734375
epoch 0, iter 120, loss 10364.384765625
epoch 0, iter 130, loss 9735.759765625
epoch 0, iter 140, loss 9658.318359375
epoch 0, iter 150, loss 9522.8955078125
epoch 0, iter 160, loss 8881.044921875
epoch 0, iter 170, loss 7976.72705078125
epoch 0, iter 180, loss 8547.12109375
epoch 0, iter 190, loss 7885.3759765625
epoch 0, iter 200, loss 7717.91357421875
epoch 0, iter 210, loss 7657.322265625
epoch 0, iter 220, loss 8265.4482421875
epoch 0, iter 230, loss 7017.1962890625
epoch 0, iter 240, loss 7050.701171875
epoch 0, iter 250, l

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
torch.save(model.state_dict(), "keypoints_model.pth")
torch.save(optimizer.state_dict(), 'optimizer.pth')

In [None]:
model.load_state_dict(torch.load('keypoints_model.pth'))
optimizer.load_state_dict(torch.load('optimizer.pth'))

# Define the number of additional epochs
additional_epochs = 20
total_epochs = epochs + additional_epochs  # Total epochs including previously trained epochs

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(epochs, total_epochs):
    for i, (imgs, kps) in enumerate(train_loader):
        imgs = imgs.to(device)
        kps = kps.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print(f"epoch {epoch}, iter {i}, loss {loss.item()}")

# Save the model again after training
torch.save(model.state_dict(), 'final_model.pth')
print("Training completed and model saved successfully.")

epoch 30, iter 0, loss 1.03144371509552
epoch 30, iter 10, loss 1.8771153688430786
epoch 30, iter 20, loss 2.3323233127593994
epoch 30, iter 30, loss 1.1387273073196411
epoch 30, iter 40, loss 0.64808589220047
epoch 30, iter 50, loss 0.9844887852668762
epoch 30, iter 60, loss 1.197916030883789
epoch 30, iter 70, loss 0.8513360023498535
epoch 30, iter 80, loss 0.6063991189002991
epoch 30, iter 90, loss 2.442007541656494
epoch 30, iter 100, loss 1.3507256507873535
epoch 30, iter 110, loss 1.092253565788269
epoch 30, iter 120, loss 1.5133079290390015
epoch 30, iter 130, loss 1.1405905485153198
epoch 30, iter 140, loss 3.0844662189483643
epoch 30, iter 150, loss 10.970961570739746
epoch 30, iter 160, loss 3.5040664672851562
epoch 30, iter 170, loss 0.8491945266723633
epoch 30, iter 180, loss 1.4908809661865234
epoch 30, iter 190, loss 1.735029697418213
epoch 30, iter 200, loss 0.6444419622421265
epoch 30, iter 210, loss 0.8996357321739197
epoch 30, iter 220, loss 1.1706624031066895
epoch 3