# Translation model from COCO -> MPII


In [2]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import json

  from .autonotebook import tqdm as notebook_tqdm


In [3]:


ROOT_FOLDER = "/datagrid/personal/baljibil"
# ground truth
with open(ROOT_FOLDER + '/data/MPII_COCO/annotations/mpii_val.json', 'r') as f:
    annot_truth = json.load(f)
# prediction
with open(ROOT_FOLDER + '/repos/scripts/coco_pred_on_mpii/pred.json', 'r') as f:
    annot_pred_coco = json.load(f)

# with open(ROOT_FOLDER + '/repos/scripts/mpii_pred_on_mpii/pred.json', 'r') as f:
#     annot_pred_mpii = json.load(f)



In [4]:
X = list()
y = list()
box_scale = list()
box_center = list()
for i in range(len(annot_truth)):
  X.append(annot_pred_coco['annotations'][i]['joints'])
  y.append(annot_truth[i]['joints'])
  box_scale.append(annot_truth[i]['scale'])
  box_center.append(annot_truth[i]['center'])
X = np.array(X)
y = np.array(y)
box_scale = np.array(box_scale)
box_center = np.array(box_center)
print("X shape: ", X.shape)
print("y shape:", y.shape)
print("box scale:", box_scale.shape)
print("box center:", box_center.shape)

X shape:  (2958, 17, 2)
y shape: (2958, 16, 2)
box scale: (2958,)
box center: (2958, 2)


## Normalize the dataset
- First by scale value of the bounding box
- Then either 0-1 normalization

In [5]:

X_normalized = X - np.repeat(box_center[:, np.newaxis, :], 17, axis=1)
X_normalized = X_normalized / box_scale[:, np.newaxis, np.newaxis]
y_normalized = y - np.repeat(box_center[:, np.newaxis, :], 16, axis=1)
y_normalized = y_normalized / box_scale[:, np.newaxis, np.newaxis]

In [6]:
from sklearn.model_selection import train_test_split

## Split train, val, test

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_normalized, test_size=0.3, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

print("Train instance: ", X_train.shape[0])
print("Val instance: ", X_val.shape[0])
print("Test instance: ", X_test.shape[0])

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

Train instance:  1552
Val instance:  518
Test instance:  888
torch.Size([1552, 17, 2])
torch.Size([1552, 16, 2])
torch.Size([518, 17, 2])
torch.Size([518, 16, 2])


In [8]:
# Create a DataLoader for training and validation data
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset)

In [13]:

class TranslatorNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.hidden1 = nn.Linear(17 * 2, 64)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(64, 32)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(32, 16*2)
        # self.act_output = nn.Sigmoid()

    def forward(self, x):
        x = self.flatten(x)
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.output(x)
        return x.view(-1, 16, 2)

    
# Instantiate the model, loss function, and optimizer
model = TranslatorNN()
# print(X_train.shape)
with torch.no_grad():
    print("Pred: ", model(X_train)[:1])
    print("GT:", y_train[:1])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
num_epochs = 200
# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_dataset)
    
    # Validation loop
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
    val_epoch_loss = val_loss / len(val_dataset)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}")
print(model)



Pred:  tensor([[[ 2.4119, -0.3988],
         [ 0.1322,  3.8371],
         [-5.0854, -1.9829],
         [-0.5996, -4.2318],
         [ 2.5422,  3.2279],
         [-0.5547, -0.2550],
         [-1.4770,  1.6041],
         [-3.2899,  2.5912],
         [-4.0852,  0.2372],
         [-2.3274,  0.3135],
         [ 3.7015, -1.5642],
         [-0.9739,  2.6098],
         [-2.3990,  3.3872],
         [ 2.6193, -0.8078],
         [-2.6423, -0.9959],
         [-2.0022,  0.6103]]])
GT: tensor([[[-23.0007,  99.1904],
         [-21.5631,  46.0013],
         [-20.1256,  -0.4792],
         [  4.3126,  -0.4792],
         [  5.2710,  46.9597],
         [  9.5836, 100.6279],
         [ -7.6669,  -0.4792],
         [ -7.1877, -46.4805],
         [ -7.3666, -56.4111],
         [ -7.9671, -89.7390],
         [-17.7297,   1.4375],
         [-30.1884, -23.4798],
         [-28.7508, -46.4805],
         [ 14.3754, -46.9597],
         [ 20.1256, -20.6048],
         [  3.8334,  -0.4792]]])
Epoch [1/200], Train Loss