## Explore dataset

In [1]:
!ls data/facial-keypoints-detection

IdLookupTable.csv    test.csv             training.csv
SampleSubmission.csv test.zip             training.zip


In [2]:
TEST_DATASET_PATH = 'data/facial-keypoints-detection/test.csv'

In [21]:
import pandas as pd


df = pd.read_csv(TEST_DATASET_PATH)
df.head()

Unnamed: 0,ImageId,Image
0,1,182 183 182 182 180 180 176 169 156 137 124 10...
1,2,76 87 81 72 65 59 64 76 69 42 31 38 49 58 58 4...
2,3,177 176 174 170 169 169 168 166 166 166 161 14...
3,4,176 174 174 175 174 174 176 176 175 171 165 15...
4,5,50 47 44 101 144 149 120 58 48 42 35 35 37 39 ...


## Make torch dataloader

In [33]:
import numpy as np


class Sample:
    def __init__(self, image: np.ndarray, idx: int):
        assert isinstance(image, np.ndarray), f"Invalid image: {type(image)}"
        assert image.shape == Sample.image_shape(), f"Invalid shape: {image.shape}"
        assert image.dtype == np.uint8, f"Invalid dtype: {image.dtype}"
        self._img = image
        self._idx = idx
        
    @staticmethod
    def keypoints_names():
        return {
            'left_eye_center', 'right_eye_center',
            'left_eye_inner_corner', 'left_eye_outer_corner',
            'right_eye_inner_corner', 'right_eye_outer_corner',
            'left_eyebrow_inner_end', 'left_eyebrow_outer_end',
            'right_eyebrow_inner_end', 'right_eyebrow_outer_end',
            'nose_tip', 'mouth_left_corner', 'mouth_right_corner',
            'mouth_center_top_lip', 'mouth_center_bottom_lip',
        }

    @staticmethod
    def image_shape():
        return (96, 96, 3)

    @staticmethod
    def image_from_str(s):
        flatten_array = np.asarray(list(map(int, s.split(' '))))
        image = flatten_array.reshape(Sample.image_shape()[:2])
        image = np.repeat(np.expand_dims(image, -1), 3, -1)
        return image.astype(np.uint8)

    @staticmethod
    def from_series(series, hint=None):
        image = Sample.image_from_str(series["Image"])
        idx = series["ImageId"]
        return Sample(
            image=image,
            idx=idx,
        )
    
    @property
    def image(self):
        return self._img

    @property
    def idx(self):
        return self._idx


In [51]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset


class KeyPointsDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self._df = pd.read_csv(csv_file).dropna()
        self._transform = transform

    def __len__(self):
        return len(self._df.index)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        data_sample = Sample.from_series(self._df.iloc[idx])
        sample = {
            'image': data_sample.image,
            'idx': data_sample.idx
        }

        if self._transform:
            sample = self._transform(sample)

        return sample

    
class ToTensor(object):
    def __call__(self, sample):
        image, idx = sample['image'], sample['idx']
        return {
            'image': torch.from_numpy(image).float(),
            'idx': torch.from_numpy(np.asarray(idx)).int(),
        }

In [52]:
dataset = KeyPointsDataset(TEST_DATASET_PATH, transform=ToTensor())

In [53]:
import torch


dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True, num_workers=0)

In [54]:
it = iter(dataloader)
batch = next(it)
imgs_batch, idx_batch = batch['image'], batch['idx']
imgs_batch.shape, imgs_batch.dtype, idx_batch

(torch.Size([16, 96, 96, 3]),
 torch.float32,
 tensor([1070, 1281, 1591, 1332, 1583, 1408,  480, 1759,  184,  164, 1178,  740,
         1589,  269, 1294, 1313], dtype=torch.int32))

In [55]:
import yaml

from model.keypoints_regressor import build_model


deploy_config = 'configs/deploy.yaml'
checkpoint = 'checkpoints/26-Nov-2022-13-50-01/state_dict_epoch_99_final'

with open(deploy_config, 'rt') as f:
    config = yaml.safe_load(f.read())
model = build_model(config)
model.load_state_dict(torch.load(checkpoint))
model.eval()



KeypointsRegressor(
  (preprocessor): Preprocessor(
    (_normalize): Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  )
  (core): ModelCore(
    (backbone): MobileNetV2(
      (features): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): InvertedResidual(
          (conv): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, trac

In [56]:
model(imgs_batch)



tensor([[35.3043, 63.6925, 36.2361, 57.3584, 71.3809, 61.1587, 67.9841, 46.4500,
         36.7024, 22.8989, 28.9271, 76.8702, 35.9782, 29.3494, 28.0546, 37.0407,
         78.5478, 46.8620, 71.6979, 33.5923, 56.4348, 44.3751, 36.7381, 35.3563,
         27.7292, 53.2309, 29.1551, 16.4275, 35.8894, 70.2847],
        [38.1928, 63.0361, 39.3858, 55.8437, 76.4693, 61.0526, 76.5677, 45.3471,
         38.7263, 20.2209, 32.1163, 77.0672, 38.0382, 27.9887, 31.9161, 38.0139,
         80.2802, 45.5125, 76.0491, 29.5341, 60.1795, 45.3648, 39.3476, 35.4728,
         32.1306, 53.9716, 31.3090, 14.4804, 38.9390, 70.9580],
        [39.3644, 64.5978, 40.2081, 57.3690, 76.3040, 61.0421, 75.8854, 45.4584,
         37.4304, 23.0431, 34.6551, 79.1130, 37.3073, 30.7761, 31.9817, 41.0296,
         79.7447, 45.3106, 73.8315, 29.4848, 61.4714, 46.3289, 39.1072, 38.0739,
         33.1281, 55.7794, 30.0678, 17.9651, 40.6153, 72.5675],
        [40.4780, 65.8592, 39.8696, 58.6680, 74.0926, 56.6729, 67.7126, 42.4591

In [57]:
model(imgs_batch, ret_raw=False)

[{'left_eye_center': (35.304344, 63.692497),
  'right_eye_center': (36.236076, 57.358444),
  'left_eye_inner_corner': (71.38088, 61.15867),
  'left_eye_outer_corner': (67.984116, 46.449974),
  'right_eye_inner_corner': (36.702362, 22.898897),
  'right_eye_outer_corner': (28.927053, 76.870186),
  'left_eyebrow_inner_end': (35.978218, 29.349426),
  'left_eyebrow_outer_end': (28.054579, 37.04071),
  'right_eyebrow_inner_end': (78.547775, 46.86204),
  'right_eyebrow_outer_end': (71.69785, 33.592274),
  'nose_tip': (56.43479, 44.375122),
  'mouth_left_corner': (36.73812, 35.356277),
  'mouth_right_corner': (27.729233, 53.23085),
  'mouth_center_top_lip': (29.155083, 16.427452),
  'mouth_center_bottom_lip': (35.889435, 70.28465)},
 {'left_eye_center': (38.192787, 63.03613),
  'right_eye_center': (39.38582, 55.84365),
  'left_eye_inner_corner': (76.469315, 61.05259),
  'left_eye_outer_corner': (76.56773, 45.347073),
  'right_eye_inner_corner': (38.726273, 20.220913),
  'right_eye_outer_corner

## Prepare prediction

In [90]:
from collections import defaultdict


predictions = {}
row_idx = 0
for batch_id, batch in enumerate(dataloader):
    imgs_batch, idx_batch = batch['image'], batch['idx']
    preds = model(imgs_batch, ret_raw=False)
    for image_pred, image_idx in zip(preds, idx_batch):
        image_idx = int(image_idx.numpy())
        predictions[image_idx] = image_pred



In [93]:
predictions[2]

{'left_eye_center': (36.18582, 66.12157),
 'right_eye_center': (37.310413, 59.63777),
 'left_eye_inner_corner': (76.02757, 63.668007),
 'left_eye_outer_corner': (72.544754, 48.25981),
 'right_eye_inner_corner': (39.38438, 20.576767),
 'right_eye_outer_corner': (28.43425, 79.46573),
 'left_eyebrow_inner_end': (38.20011, 27.119938),
 'left_eyebrow_outer_end': (30.850714, 36.181896),
 'right_eyebrow_inner_end': (85.47078, 48.87603),
 'right_eyebrow_outer_end': (77.34552, 33.21126),
 'nose_tip': (57.542732, 46.71447),
 'mouth_left_corner': (38.67534, 33.900158),
 'mouth_right_corner': (29.658188, 57.02594),
 'mouth_center_top_lip': (31.650848, 14.025801),
 'mouth_center_bottom_lip': (36.674168, 73.16548)}

In [127]:
example_df = pd.read_csv('data/facial-keypoints-detection/SampleSubmission.csv')
example_df.head()

Unnamed: 0,RowId,Location
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0


In [128]:
lookup = pd.read_csv('data/facial-keypoints-detection/IdLookupTable.csv')
lookup.head()

Unnamed: 0,RowId,ImageId,FeatureName,Location
0,1,1,left_eye_center_x,
1,2,1,left_eye_center_y,
2,3,1,right_eye_center_x,
3,4,1,right_eye_center_y,
4,5,1,left_eye_inner_corner_x,


In [129]:
from collections import defaultdict

data = defaultdict(list)
for rowid in example_df["RowId"]:
    mapping = lookup.loc[lookup['RowId'] == rowid]
    image_id = mapping['ImageId'].values[0]
    feature_name = mapping['FeatureName'].values[0]
    feature_name, coord = feature_name[:-2], feature_name.split("_")[-1]
    pred = predictions[image_id][feature_name]
    location = pred[0] if coord == 'y' else pred[1]
    data["RowId"].append(rowid)
    data["Location"].append(location)

ans_df = pd.DataFrame.from_dict(data)
ans_df.head()

Unnamed: 0,RowId,Location
0,1,65.460831
1,2,36.115658
2,3,59.164566
3,4,36.816216
4,5,61.841137


In [130]:
ans_df.to_csv("submission.csv", index=False)

In [131]:
len(ans_df)

27124