# I. Data Preprocessing

In [1]:
import xml.etree.ElementTree as ET 
import numpy as np
import os 

tree = ET.parse('ibug_300W_large_face_landmark_dataset/labels_ibug_300W_train.xml')
root = tree.getroot()
root_dir = 'ibug_300W_large_face_landmark_dataset'

bboxes = [] # face bounding box used to crop the image
landmarks = [] # the facial keypoints/landmarks for the whole training dataset
img_filenames = [] # the image names for the whole dataset

for filename in root[2]:
	img_filenames.append(os.path.join(root_dir, filename.attrib['file']))
	box = filename[0].attrib
	# x, y for the top left corner of the box, w, h for box width and height
	bboxes.append([box['left'], box['top'], box['width'], box['height']]) 

	landmark = []
	for num in range(68):
		x_coordinate = int(filename[0][num].attrib['x'])
		y_coordinate = int(filename[0][num].attrib['y'])
		landmark.append([x_coordinate, y_coordinate])
	landmarks.append(landmark) # relative? 

landmarks = np.array(landmarks).astype('float32')
bboxes = np.array(bboxes).astype('float32') 

In [2]:
# build a dataset
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

import numpy
import random

import albumentations as A
from albumentations.pytorch import ToTensorV2

def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
seed_everything(42)

In [3]:
class IbugTrainingHeatmapDataset(Dataset):
    def __init__(self, img_filenames, bboxes, landmarks, normalize=True, basic_transform=None, albu_transform=None, sigma=1):
        self.img_filenames = img_filenames
        self.bboxes = bboxes
        self.landmarks = landmarks
        self.basic_transform = basic_transform # resize, totensor, normalize
        self.albu_transform = albu_transform # albumentations
        self.normalize = normalize
        self.sigma = sigma
        if not self.normalize:
            print('Not normalizing the image')
        if not self.basic_transform:
            print('No basic transformation')
        
    def __len__(self):
        return len(self.img_filenames)
    
    def __getitem__(self, idx):
        img_path = self.img_filenames[idx]
        opened_img = Image.open(img_path).convert('L') # range [0, 255] # shape (H, W)
        bounding_box = self.bboxes[idx]
        landmark_ori = self.landmarks[idx] # (68, 2)
        x, y, w, h = bounding_box # left, top, width, height
        cropped_by_bbox = opened_img.crop((x, y, x+w, y+h)) # shape (h, w)

        cropped_by_bbox = np.array(cropped_by_bbox) # range [0, 255] # shape (H, W)
        cropped_by_bbox = np.expand_dims(cropped_by_bbox, axis=2) # shape (H, W, 1)
        # to float32
        cropped_by_bbox = cropped_by_bbox.astype(np.float32) # shape (H, W, C)
        if self.normalize:
            cropped_by_bbox = cropped_by_bbox / 255.0 - 0.5 # range [-0.5, 0.5]
            # print(cropped_by_bbox.dtype)
        # adjust the landmark
        # landmark2 = landmark - [x, y] # FIXME: broadcast?
        landmark = np.zeros_like(landmark_ori)
        landmark[:, 0] = landmark_ori[:, 0] - x
        landmark[:, 1] = landmark_ori[:, 1] - y
        # assert np.all(landmark == landmark2)
        
        
        # to relative coordinates
        if self.albu_transform:
            transformed = self.albu_transform(image=cropped_by_bbox, keypoints=landmark)
            tfed_im = transformed['image'] # 68 tokens and each token has 224*224 classes
        landmark[:, 1] = landmark[:, 1] / h 
        landmark[:, 0] = landmark[:, 0] / w 
        # print(tfed_im.shape, w, h)
        # tfed_im = torch.tensor(tfed_im)
        # print(tfed_im.shape)
        if self.basic_transform:
            tfed_im = self.basic_transform(tfed_im) # tfed_im: (C, 224, 224)
        else:
            tfed_im = torch.tensor(tfed_im)
        # heatmap should be (68, 224, 224)
        heatmap = self._gaussian_heatmap(landmark * 224, 224, 224, 68)
        heatmap = torch.tensor(heatmap)
        heatmap = heatmap.float()
        landmark = torch.tensor(landmark)
        return tfed_im, heatmap, landmark # tfed_im: (C=1, 224, 224), heatmap: (68, 224, 224), landmark: (68, 2)
    
    def _gaussian_heatmap(self, landmark, height, width, channels):
        heatmap = np.ones((channels, height, width)).astype(np.float32)
        # landmark: (68, 2)
        for i, (x, y) in enumerate(landmark):
            if x < 0 or y < 0 or x >= width or y >= height:
                # all 1s so that loss is 0
                continue
            x_coords = np.arange(width) # from 0 to width-1
            y_coords = np.arange(height) # 68 tokens and each token has 224*224 classes
            x_coords, y_coords = np.meshgrid(x_coords, y_coords)
            heatmap[i] = np.exp(-((x_coords - x) ** 2 + (y_coords - y) ** 2) / (2 * self.sigma ** 2))
            # optional: normalize the heatmap by max or sum
            # heatmap[i] = heatmap[i] / np.sum(heatmap[i])
        return heatmap
    
    
basic_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)), # From (C, H, W) to (C, 224, 224)
    # grayscale
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

transform = A.Compose([
    A.Affine(rotate=(-15, 15), translate_percent={'x': 0.1, 'y': 0.1}),
    # A.GaussNoise(p=0.5), # DO WE NEED THIS?
    A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    ToTensorV2(),
], keypoint_params=A.KeypointParams(format='xy', remove_invisible=False))

dataset = IbugTrainingHeatmapDataset(img_filenames, bboxes, landmarks, basic_transform=basic_transform, albu_transform=transform, normalize=True, sigma=1)

# II. Model Architecture
We use a UNet predicting the probability density heatmap (shape [68, 224, 224]) of the face landmarks, and then find the expected position of the landmarks w.r.t. the heatmap,
i.e. a pixelwise classification problem.

In [4]:
# unet
import torch
import torch.nn as nn
import torch.nn.functional as F
# load unet
from unet import PixelwiseClassificationUNet

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PixelwiseClassificationUNet(1, 68, 72).to(device)
def init_weights(m):
    if type(m) == nn.Conv2d:
        nn.init.kaiming_normal_(m.weight)
        nn.init.zeros_(m.bias)
model.apply(init_weights)
n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Number of parameters: {n_params}')

Number of parameters: 2409908


# III. Training

In [6]:

from tqdm import tqdm
LR = 1e-3
BS = 16
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
num_epochs = 30
criterion = nn.MSELoss()
mae = nn.L1Loss()

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(len(dataset) * 0.8), len(dataset) - int(len(dataset) * 0.8)])
train_loader = DataLoader(train_dataset, batch_size=BS, shuffle=True, num_workers=16)
val_loader = DataLoader(val_dataset, batch_size=BS, shuffle=False, num_workers=16)

In [7]:
import warnings
warnings.filterwarnings("ignore")
# train
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_mae = 0
    loop = tqdm(enumerate(train_loader), total=len(train_loader), leave=True)
    loop.set_description(f'Epoch [{epoch+1}/{num_epochs}]')
    for i, (images, heatmaps, landmark) in loop:
        # bad heatmap -> jump to next iteration
        # i.e. < 1e-4
        # if torch.sum(heatmaps) < 1e-4:
        #     continue
        images = images.to(device) # shape: (B, C, H, W)
        landmark = landmark.to(device) # shape: (B, 68, 2)
        
        # Forward pass
        predicted_landmark = model(images) # shape: (B, 68, 2)
        loss = criterion(predicted_landmark, landmark)
        # learn the heatmap: not what we are doing now
        # pred_heatmap = model(images, return_heatmap=True) # shape: (B, 68, H*W)
        # do BCE
        # heatmaps = heatmaps.view(-1, 68, 224*224) # shape: (B, 68, H*W)
        # loss = bce(pred_heatmap, heatmaps)
        with torch.no_grad():
            mae_loss = mae(predicted_landmark, landmark) * 224
            train_mae += mae_loss.item()
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        loop.set_postfix(train_loss=loss.item(), mae=mae_loss.item())

    train_loss /= len(train_loader)
    train_losses.append(train_loss)
    train_mae /= len(train_loader)
    # validation
    model.eval()
    val_loss = 0
    val_mae = 0
    loop = tqdm(enumerate(val_loader), total=len(val_loader), leave=True)
    loop.set_description(f'Epoch [{epoch+1}/{num_epochs}]')
    with torch.no_grad():
        for i, (images, heatmaps, landmark) in loop:
            images = images.to(device) # shape: (B, C, H, W)
            heatmaps = heatmaps.to(device) # shape: (B, 68, H, W)
            landmark = landmark.to(device) # shape: (B, 68, 2)
            # Forward pass
            predicted_landmark = model(images)
            loss = criterion(predicted_landmark, landmark)
            mae_loss = mae(predicted_landmark, landmark) * 224
            val_mae += mae_loss.item()
            val_loss += loss.item()
            loop.set_postfix(val_loss=loss.item(), mae=mae_loss.item())
            
    val_loss /= len(val_loader)
    val_mae /= len(val_loader)
    val_losses.append(val_loss)
    print(f'Epoch {epoch+1}, Train Loss: {train_loss}, Val Loss: {val_loss}, Train MAE: {train_mae}, Val MAE: {val_mae}')
        

Epoch [1/30]: 100%|██████████| 334/334 [02:17<00:00,  2.42it/s, mae=16.5, train_loss=0.00905]
Epoch [1/30]: 100%|██████████| 84/84 [00:24<00:00,  3.49it/s, mae=12.2, val_loss=0.00481]

Epoch 1, Train Loss: 0.010969726476835053, Val Loss: 0.005435942245336871



Epoch [2/30]: 100%|██████████| 334/334 [02:19<00:00,  2.39it/s, mae=15.3, train_loss=0.0067] 
Epoch [2/30]: 100%|██████████| 84/84 [00:23<00:00,  3.58it/s, mae=12.3, val_loss=0.00504]

Epoch 2, Train Loss: 0.00563011615408841, Val Loss: 0.0056964997811952515



Epoch [3/30]: 100%|██████████| 334/334 [02:20<00:00,  2.37it/s, mae=12.5, train_loss=0.00494]
Epoch [3/30]: 100%|██████████| 84/84 [00:24<00:00,  3.40it/s, mae=11.7, val_loss=0.0045] 

Epoch 3, Train Loss: 0.005531313762899689, Val Loss: 0.005507634123898156



Epoch [4/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=10.2, train_loss=0.00326]
Epoch [4/30]: 100%|██████████| 84/84 [00:24<00:00,  3.50it/s, mae=11.5, val_loss=0.00438]

Epoch 4, Train Loss: 0.005411045891400092, Val Loss: 0.005481550212217761



Epoch [5/30]: 100%|██████████| 334/334 [02:21<00:00,  2.36it/s, mae=9.8, train_loss=0.00332] 
Epoch [5/30]: 100%|██████████| 84/84 [00:24<00:00,  3.48it/s, mae=12.1, val_loss=0.00488]

Epoch 5, Train Loss: 0.005379487749211445, Val Loss: 0.005374456470322218



Epoch [6/30]: 100%|██████████| 334/334 [02:19<00:00,  2.39it/s, mae=11.4, train_loss=0.00384]
Epoch [6/30]: 100%|██████████| 84/84 [00:24<00:00,  3.49it/s, mae=11.4, val_loss=0.00435]

Epoch 6, Train Loss: 0.0052626588469661196, Val Loss: 0.005115697027317115



Epoch [7/30]: 100%|██████████| 334/334 [02:19<00:00,  2.40it/s, mae=13, train_loss=0.00562]  
Epoch [7/30]: 100%|██████████| 84/84 [00:24<00:00,  3.48it/s, mae=11.9, val_loss=0.00463]

Epoch 7, Train Loss: 0.0051296811298506256, Val Loss: 0.005005192225022863



Epoch [8/30]: 100%|██████████| 334/334 [02:20<00:00,  2.37it/s, mae=11.9, train_loss=0.00443]
Epoch [8/30]: 100%|██████████| 84/84 [00:23<00:00,  3.53it/s, mae=11, val_loss=0.00406]  

Epoch 8, Train Loss: 0.00499377280130737, Val Loss: 0.004896362289963733



Epoch [9/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=9.51, train_loss=0.00289]
Epoch [9/30]: 100%|██████████| 84/84 [00:24<00:00,  3.49it/s, mae=11.4, val_loss=0.00443]

Epoch 9, Train Loss: 0.004856695126167716, Val Loss: 0.004731630875972942



Epoch [10/30]: 100%|██████████| 334/334 [02:20<00:00,  2.37it/s, mae=12.2, train_loss=0.00473]
Epoch [10/30]: 100%|██████████| 84/84 [00:23<00:00,  3.54it/s, mae=11.6, val_loss=0.00449]

Epoch 10, Train Loss: 0.004699312693517499, Val Loss: 0.0047028449702165315



Epoch [11/30]: 100%|██████████| 334/334 [02:19<00:00,  2.39it/s, mae=12.3, train_loss=0.00471]
Epoch [11/30]: 100%|██████████| 84/84 [00:23<00:00,  3.55it/s, mae=10.5, val_loss=0.00371]

Epoch 11, Train Loss: 0.00455637540581809, Val Loss: 0.0044474554541964265



Epoch [12/30]: 100%|██████████| 334/334 [02:20<00:00,  2.37it/s, mae=11.9, train_loss=0.00536]
Epoch [12/30]: 100%|██████████| 84/84 [00:23<00:00,  3.54it/s, mae=11.9, val_loss=0.00456]

Epoch 12, Train Loss: 0.0044137343386478145, Val Loss: 0.0045533192571296935



Epoch [13/30]: 100%|██████████| 334/334 [02:19<00:00,  2.40it/s, mae=9.14, train_loss=0.00376]
Epoch [13/30]: 100%|██████████| 84/84 [00:23<00:00,  3.52it/s, mae=10.6, val_loss=0.00401]

Epoch 13, Train Loss: 0.0042368402811949575, Val Loss: 0.004155690759597789



Epoch [14/30]: 100%|██████████| 334/334 [02:19<00:00,  2.40it/s, mae=10.2, train_loss=0.00423]
Epoch [14/30]: 100%|██████████| 84/84 [00:24<00:00,  3.44it/s, mae=10.7, val_loss=0.00386]

Epoch 14, Train Loss: 0.004085329037806588, Val Loss: 0.004101650390241828



Epoch [15/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=9.3, train_loss=0.00318] 
Epoch [15/30]: 100%|██████████| 84/84 [00:24<00:00,  3.47it/s, mae=10.7, val_loss=0.00394]

Epoch 15, Train Loss: 0.003929522860248735, Val Loss: 0.003949147080891721



Epoch [16/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=6.18, train_loss=0.00124]
Epoch [16/30]: 100%|██████████| 84/84 [00:23<00:00,  3.54it/s, mae=9.89, val_loss=0.00345]

Epoch 16, Train Loss: 0.0037887787051125244, Val Loss: 0.003947572408443583



Epoch [17/30]: 100%|██████████| 334/334 [02:19<00:00,  2.40it/s, mae=9.43, train_loss=0.00295]
Epoch [17/30]: 100%|██████████| 84/84 [00:23<00:00,  3.53it/s, mae=9.91, val_loss=0.00342]

Epoch 17, Train Loss: 0.003713293431862118, Val Loss: 0.0038041300944141334



Epoch [18/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=9.61, train_loss=0.00329]
Epoch [18/30]: 100%|██████████| 84/84 [00:23<00:00,  3.58it/s, mae=10.1, val_loss=0.0036] 

Epoch 18, Train Loss: 0.0035923162404124342, Val Loss: 0.0037173776321911384



Epoch [19/30]: 100%|██████████| 334/334 [02:20<00:00,  2.37it/s, mae=9.37, train_loss=0.00302]
Epoch [19/30]: 100%|██████████| 84/84 [00:24<00:00,  3.50it/s, mae=9.35, val_loss=0.00328]

Epoch 19, Train Loss: 0.0034981165771389346, Val Loss: 0.0036517329863272607



Epoch [20/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=9.03, train_loss=0.00289]
Epoch [20/30]: 100%|██████████| 84/84 [00:24<00:00,  3.44it/s, mae=9.48, val_loss=0.0034] 

Epoch 20, Train Loss: 0.0033992103692757603, Val Loss: 0.0037289736044060972



Epoch [21/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=7.07, train_loss=0.00176]
Epoch [21/30]: 100%|██████████| 84/84 [00:23<00:00,  3.53it/s, mae=10.2, val_loss=0.00365]

Epoch 21, Train Loss: 0.0033108054396757732, Val Loss: 0.003460317916635956



Epoch [22/30]: 100%|██████████| 334/334 [02:20<00:00,  2.37it/s, mae=11.2, train_loss=0.00418]
Epoch [22/30]: 100%|██████████| 84/84 [00:23<00:00,  3.57it/s, mae=9.5, val_loss=0.0033]  

Epoch 22, Train Loss: 0.0032651684052467035, Val Loss: 0.003353077225342748



Epoch [23/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=8.33, train_loss=0.00239]
Epoch [23/30]: 100%|██████████| 84/84 [00:23<00:00,  3.51it/s, mae=9.33, val_loss=0.00319]

Epoch 23, Train Loss: 0.003161968355217647, Val Loss: 0.003371865494132397



Epoch [24/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=7.25, train_loss=0.00186]
Epoch [24/30]: 100%|██████████| 84/84 [00:24<00:00,  3.45it/s, mae=8.94, val_loss=0.003]  

Epoch 24, Train Loss: 0.0030927631988686448, Val Loss: 0.0032916192431002855



Epoch [25/30]: 100%|██████████| 334/334 [02:20<00:00,  2.37it/s, mae=7.91, train_loss=0.00249]
Epoch [25/30]: 100%|██████████| 84/84 [00:24<00:00,  3.42it/s, mae=9.06, val_loss=0.00312]

Epoch 25, Train Loss: 0.0030378443387638174, Val Loss: 0.003241983477935372



Epoch [26/30]: 100%|██████████| 334/334 [02:21<00:00,  2.36it/s, mae=7.27, train_loss=0.00189]
Epoch [26/30]: 100%|██████████| 84/84 [00:24<00:00,  3.42it/s, mae=9.02, val_loss=0.003]  

Epoch 26, Train Loss: 0.0029920081787151533, Val Loss: 0.0032905920053876584



Epoch [27/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=9.11, train_loss=0.00274]
Epoch [27/30]: 100%|██████████| 84/84 [00:25<00:00,  3.33it/s, mae=9.19, val_loss=0.0032] 

Epoch 27, Train Loss: 0.0029195069361890773, Val Loss: 0.003187960997733864



Epoch [28/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=8.72, train_loss=0.00247]
Epoch [28/30]: 100%|██████████| 84/84 [00:24<00:00,  3.38it/s, mae=9.05, val_loss=0.0031] 

Epoch 28, Train Loss: 0.0028941785933416404, Val Loss: 0.0031570998447326324



Epoch [29/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=8.46, train_loss=0.00304]
Epoch [29/30]: 100%|██████████| 84/84 [00:24<00:00,  3.41it/s, mae=8.69, val_loss=0.00292]

Epoch 29, Train Loss: 0.0028453234267354147, Val Loss: 0.003122649538064642



Epoch [30/30]: 100%|██████████| 334/334 [02:20<00:00,  2.38it/s, mae=9.44, train_loss=0.003]  
Epoch [30/30]: 100%|██████████| 84/84 [00:23<00:00,  3.52it/s, mae=8.83, val_loss=0.00301]

Epoch 30, Train Loss: 0.0027959737215681882, Val Loss: 0.0031119356890918597





In [8]:


# %pip install pandas
import xml.etree.ElementTree as ET 
import numpy as np
import pandas as pd
import os 

tree = ET.parse('ibug_300W_large_face_landmark_dataset/labels_ibug_300W_test_parsed.xml')
root = tree.getroot()
root_dir = 'ibug_300W_large_face_landmark_dataset'

bboxes = [] # face bounding box used to crop the image
img_filenames = [] # the image names for the whole dataset

for filename in root[2]:
	img_filenames.append(os.path.join(root_dir, filename.attrib['file']))
	box = filename[0].attrib
	# x, y for the top left corner of the box, w, h for box width and height
	bboxes.append([box['left'], box['top'], box['width'], box['height']]) 

bboxes = np.array(bboxes).astype('float32') 
print(bboxes.shape)
print(len(img_filenames))
print(img_filenames[0])

# we now have img_filenames and bboxes
# for every i in range(len(img_filenames)), 
# we crop the image, resize it to (224, 224), and then feed it to the model
# the output is the landmarks in shape (68, 2)
class IBugTestDataset(Dataset):
    def __init__(self, img_filenames, bboxes, normalize=True, basic_transform=None):
        self.img_filenames = img_filenames
        self.bboxes = bboxes
        self.basic_transform = basic_transform # resize, totensor
        self.normalize = normalize
        
    def __len__(self):
        return len(self.img_filenames)
    
    def __getitem__(self, idx):
        img_path = self.img_filenames[idx]
        # grayscale
        opened_img = Image.open(img_path).convert('RGB') # range [0, 255] # shape (H, W, C)
        bounding_box = self.bboxes[idx]
        x, y, w, h = bounding_box # left, top, width, height
        cropped_by_bbox = opened_img.crop((x, y, x+w, y+h))
        cropped_by_bbox = np.array(cropped_by_bbox) # range [0, 255]
        if self.normalize:
            cropped_by_bbox = cropped_by_bbox / 255.0 - 0.5
            
        if self.basic_transform:
            cropped_by_bbox = self.basic_transform(cropped_by_bbox)
        else:
            cropped_by_bbox = torch.tensor(cropped_by_bbox).permute(2, 0, 1) # (C, H, W)
        return cropped_by_bbox, bounding_box
    
basic_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)), # From (C, H, W) to (C, 224, 224)
    # grayscale
    transforms.Grayscale(num_output_channels=1), # From (C, H, W) to (1, H, W)
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

test_dataset = IBugTestDataset(img_filenames, bboxes, basic_transform=basic_transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BS, shuffle=False)

# test
model.eval()
preds = []
with torch.no_grad():
    for (images, bbox) in tqdm(test_loader):
        images = images.to(device)
        predicted_landmarks = model(images) # [0, 1]
        x, y, w, h = bbox[:, 0], bbox[:, 1], bbox[:, 2], bbox[:, 3]
        # to devices
        x, y, w, h = x.to(device), y.to(device), w.to(device), h.to(device)
        # adjust the landmark
        predicted_landmarks[:, :, 0] = predicted_landmarks[:, :, 0] * w[:, None] + x[:, None]
        predicted_landmarks[:, :, 1] = predicted_landmarks[:, :, 1] * h[:, None] + y[:, None]
        preds.append(predicted_landmarks)
preds = torch.cat(preds, dim=0) # (N, 68, 2)
# preds = preds.cpu().numpy()
# preds = preds.tolist()



(1008, 4)
1008
ibug_300W_large_face_landmark_dataset/helen/trainset/146827737_1.jpg


100%|██████████| 63/63 [00:32<00:00,  1.95it/s]


In [9]:
alternating_names = [] # should be 136 * test_size(1008) = 137088
alternating_values = []
counter = 0
for i in range(len(preds)):
    pred = preds[i] # (68, 2)
    # to list
    pred = pred.cpu().numpy().tolist()
    
    for j in range(68):
        # alternating_names.append(f'image_{i+1}_keypoints_{j+1}_x')
        alternating_names.append(str(counter))
        counter += 1
        alternating_names.append(str(counter))
        counter += 1
        # alternating_names.append(f'image_{i+1}_keypoints_{j+1}_y')
        
        alternating_values.append(pred[j][0]) # x
        alternating_values.append(pred[j][1]) # y
        
        
        
df = pd.DataFrame({'Id': alternating_names, 'Predicted': alternating_values})

os.makedirs('4', exist_ok=True)
df.to_csv('4/submission.csv', index=False)
!cd 4 && ls -l && kaggle competitions submit -c cs194-26-fall-2022-project-5 -f submission.csv -m "UNet, 30 epochs, 1e-3 LR, 16 BS, 64 hidden channels"


total 3224
-rw-rw-r-- 1 it it 3299329 12月  1 23:02 submission.csv
100%|██████████████████████████████████████| 3.15M/3.15M [00:01<00:00, 1.79MB/s]
Successfully submitted to CS194-26 Fall 2022 Project 5