In [1]:
from LSPDataset import LSPDataset
from LSPTransforms import LSPTransforms
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from utils import display_image_with_pose
import torch
from tqdm.notebook import tqdm

np.set_printoptions(threshold=np.inf)


mats = loadmat("lsp/joints.mat")
mats = np.array(mats["joints"])
joints = mats.transpose(2, 0, 1)

train_size = 0.8
val_size = 0.1
test_size = 0.1


# create training, validation and test sets
indices = range(len(joints))
indices = np.random.permutation(indices)

train_indices = indices[: int(train_size * len(joints))]
val_indices = indices[
    int(train_size * len(joints)) : int((train_size + val_size) * len(joints))
]
test_indices = indices[int((train_size + val_size) * len(joints)) :]

print(f"Training set size: {len(train_indices)}")
print(f"Validation set size: {len(val_indices)}")
print(f"Test set size: {len(test_indices)}")

# filter the joints matrix to only include train_indices
train_joints = joints[train_indices]
print(f"Shape of training set: {train_joints.shape}")

# create dataset and calculate mean and std for image and joints

train_dummy = LSPDataset(
    image_path="lsp/images",
    image_indexes=train_indices,
    joints_path="lsp/joints.mat",
    joints_indexes=train_indices,
    transforms=LSPTransforms(220),
)
train_dummy_loader = DataLoader(
    train_dummy, batch_size=64, shuffle=False, num_workers=0
)

image_mean = torch.zeros(3)
image_std = torch.zeros(3)
joint_mean = torch.zeros(2)
joint_std = torch.zeros(2)
n = 0

for image, joints, _ in tqdm(train_dummy_loader):
    # calculate image mean and std
    batch_size = image.size(0)
    image = image.view(batch_size, 3, -1)  # Reshape to (batch_size, 3, height*width)
    image_mean += image.mean(2).sum(0)  # Sum of means for each channel
    image_std += image.std(2).sum(0)  # Sum of std deviations for each channel

    # calculate joint mean and std
    joints = joints[:, :, :2]
    joint_mean += joints.mean(1).sum(0)  # Sum of means for each coordinate
    joint_std += joints.std(1).sum(0)  # Sum of std deviations for each coordinate

    n += batch_size


# Calculate the mean and standard deviation across all images
image_mean /= n
image_std /= n

print("Channel Means: ", image_mean)
print("Channel Standard Deviations: ", image_std)


# Calculate the mean and standard deviation across all joints
joint_mean /= n
joint_std /= n

print("Joint Coordinate Means: ", joint_mean)
print("Joint Coordinate Std: ", joint_std)

Training set size: 8000
Validation set size: 1000
Test set size: 1000
Shape of training set: (8000, 14, 3)


  0%|          | 0/125 [00:00<?, ?it/s]

Channel Means:  tensor([72.0667, 81.2554, 67.7689])
Channel Standard Deviations:  tensor([65.6007, 64.9408, 62.9515])
Joint Coordinate Means:  tensor([82.2304, 82.2798])
Joint Coordinate Std:  tensor([70.2122, 70.1325])


In [2]:
## create the datasets

IMAGE_SIZE = 220
JOINT_DIR = "lsp/joints.mat"
IMAGE_DIR = "lsp/images"

train = LSPDataset(
    image_path=IMAGE_DIR,
    image_indexes=train_indices,
    joints_path=JOINT_DIR,
    joints_indexes=train_indices,
    transforms=LSPTransforms(
        IMAGE_SIZE,
        image_mean.numpy(),
        image_std.numpy(),
        joint_mean.numpy(),
        joint_std.numpy(),
    ),
)

test = LSPDataset(
    image_path=IMAGE_DIR,
    image_indexes=test_indices,
    joints_path=JOINT_DIR,
    joints_indexes=test_indices,
    transforms=LSPTransforms(
        IMAGE_SIZE,
        image_mean.numpy(),
        image_std.numpy(),
        joint_mean.numpy(),
        joint_std.numpy(),
    ),
)

val = LSPDataset(
    image_path=IMAGE_DIR,
    image_indexes=val_indices,
    joints_path=JOINT_DIR,
    joints_indexes=val_indices,
    transforms=LSPTransforms(
        IMAGE_SIZE,
        image_mean.numpy(),
        image_std.numpy(),
        joint_mean.numpy(),
        joint_std.numpy(),
    ),
)


train_loader = DataLoader(train, batch_size=1, shuffle=True, num_workers=0)
test_loader = DataLoader(test, batch_size=1, shuffle=False, num_workers=0)
val_loader = DataLoader(val, batch_size=1, shuffle=False, num_workers=0)

In [3]:
def masked_mse_loss(output, target):
    target_coords = target[:, :, :2]
    target_mask = target[:, :, 2]
    target_mask = target_mask.unsqueeze(-1)
    # Reshape output to match target_coords shape
    output = output.view(target_coords.shape)

    error = (output - target_coords) ** 2
    print("error")
    print(error)
    masked_square_error = error * target_mask
    print("masked_square_error")
    print(masked_square_error)
    loss = masked_square_error.sum() / target_mask.sum()
    return loss

In [4]:
from ConvNet import ConvNet
import torch.nn as nn
import torchsummary

model = ConvNet(dropout=0.6, batchnorm=True)

torchsummary.summary(model, input_size=(3, 220, 220))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 53, 53]          34,944
       BatchNorm2d-2           [-1, 96, 53, 53]             192
              ReLU-3           [-1, 96, 53, 53]               0
         MaxPool2d-4           [-1, 96, 26, 26]               0
            Conv2d-5          [-1, 256, 26, 26]         614,656
       BatchNorm2d-6          [-1, 256, 26, 26]             512
              ReLU-7          [-1, 256, 26, 26]               0
         MaxPool2d-8          [-1, 256, 12, 12]               0
            Conv2d-9          [-1, 384, 12, 12]         885,120
      BatchNorm2d-10          [-1, 384, 12, 12]             768
             ReLU-11          [-1, 384, 12, 12]               0
           Conv2d-12          [-1, 384, 12, 12]       1,327,488
      BatchNorm2d-13          [-1, 384, 12, 12]             768
             ReLU-14          [-1, 384,

In [5]:
image, joints, _ = train[0]
flattened_joints = joints[:, :2].flatten()
mask = joints[:, 2]

masked_mse_loss(flattened_joints, joints)

IndexError: too many indices for tensor of dimension 2

In [None]:
for index, (image, joints, joint_transforms) in enumerate(train_loader):
    flattened_joints = joints[:, :, :2].flatten(start_dim=1)
    random_joints = torch.rand(flattened_joints.shape)
    loss = masked_mse_loss(random_joints, joints)
    print(loss)
    break

error
tensor([[[2.4633e-01, 9.2474e-02],
         [4.0198e-02, 9.5143e-02],
         [8.5194e-01, 5.9714e-02],
         ...,
         [2.6365e-02, 6.3627e-02],
         [1.5891e+01, 6.8466e+00],
         [1.9050e-01, 1.1751e+00]],

        [[1.5283e-01, 1.2459e-03],
         [2.5769e-01, 2.4223e-04],
         [4.5663e-01, 3.0939e-01],
         ...,
         [2.6249e-01, 5.8232e-01],
         [6.8496e+00, 1.0871e+01],
         [1.6545e-01, 3.0289e-01]],

        [[1.2474e+00, 4.1059e-01],
         [8.6614e-01, 6.6004e-01],
         [2.1333e-04, 3.2342e-03],
         ...,
         [5.3671e-04, 7.6558e-01],
         [2.8320e-01, 7.9927e-01],
         [2.4081e-02, 2.9656e-01]],

        ...,

        [[1.2458e-02, 1.1487e+00],
         [4.0251e-03, 2.1648e-01],
         [1.4447e-01, 9.2552e-01],
         ...,
         [2.9329e-02, 5.1358e-01],
         [2.5156e-03, 5.9420e-04],
         [1.1679e-01, 5.1431e-02]],

        [[1.3921e+00, 1.7594e-01],
         [7.4764e-02, 2.2647e-01],
      