# Compare the keypoints on the original and the upsampled images

For the comparison, we're using torchvision's built in, pre-trained keypoint R-CNN network, as the original datasets (FDF256) keypoints were generated with this model.


**From PyTorch documentation**:
During inference, the model requires only the input tensors, and returns the post-processed predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as follows, where N is the number of detected instances:
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
- labels (Int64Tensor[N]): the predicted labels for each instance
- scores (Tensor[N]): the scores or each instance
- keypoints (FloatTensor[N, K, 3]): the locations of the predicted keypoints, in [x, y, v] format.


In [1]:
import torch
import torchvision
from torchvision.models.detection import keypointrcnn_resnet50_fpn

In [12]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import numpy as np
np.random.seed(42)

In [2]:
model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
model.eval()

KeypointRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(640, 672, 704, 736, 768, 800), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.

In [29]:
print(torch.cuda.is_available())
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)
print(predictions[0]['keypoints'].shape)

True
torch.Size([1, 17, 3])


In [4]:
from torchvision.io import read_image
from torch.utils.data import DataLoader
from fdf256dataset import FDF256Dataset

dataset_path = '/datadrive/FDF/dataset/val'
dataset = FDF256Dataset(dirpath=dataset_path, load_keypoints=True, transform=None, load_impath=True)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=22,
                            prefetch_factor=2, persistent_workers=True, pin_memory=True)

Could not load pyspng. Defaulting to pillow image backend.
Dataset loaded from: /datadrive/FDF/dataset/val. Number of samples:6531


In [13]:
predictions_path = '/datadrive/FDF/dataset/val/images'

In [37]:
from tqdm import tqdm
import os
from PIL import Image
from einops import rearrange

for i in tqdm(range(len(dataset))):
    original_image = dataset[i]['img'] # tensor of shape (256, 256, 3)
    original_kpts = dataset[i]['keypoints'] # tensor of shape (7, 2)
    print(original_kpts)
    imname = str(dataset[i]['impath']).split('/')[-1]
    print(imname)
    
    scaled_image_path = os.path.join(predictions_path, imname)
    with Image.open(scaled_image_path) as fp:
        image = np.array(fp, dtype=np.float32)
        image = image / 256.0
        image = rearrange(image, "h w c -> c h w")
    if image is None:
        continue
    
    predictions = model([torch.from_numpy(image)])
    pred_kpts = predictions[0]['keypoints'][0] # tensor of shape (17, 3)
    print(pred_kpts.shape)
    
    break
    


  0%|          | 0/6531 [00:00<?, ?it/s]

[[ 0.5746097   0.5696869 ]
 [ 0.6633309   0.43651295]
 [ 0.43749496  0.4405485 ]
 [ 0.74801946  0.43651295]
 [ 0.19149499  0.48897544]
 [ 0.8770686   0.9490306 ]
 [-0.05853773  0.97727966]]
0.png


  0%|          | 0/6531 [00:03<?, ?it/s]

torch.Size([17, 3])



