In [1]:
from warnings import filterwarnings
import os
import glob
import pandas as pd
from IPython.display import clear_output
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
from pathlib import Path
import cv2
filterwarnings('ignore')
%matplotlib inline

In [2]:
key_points = pd.read_csv('data/training_frames_keypoints.csv')

In [3]:
key_points.head(2)

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,126,127,128,129,130,131,132,133,134,135
0,Luis_Fonsi_21.jpg,45.0,98.0,47.0,106.0,49.0,110.0,53.0,119.0,56.0,...,83.0,119.0,90.0,117.0,83.0,119.0,81.0,122.0,77.0,122.0
1,Lincoln_Chafee_52.jpg,41.0,83.0,43.0,91.0,45.0,100.0,47.0,108.0,51.0,...,85.0,122.0,94.0,120.0,85.0,122.0,83.0,122.0,79.0,122.0


In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
#pip install pillow==6.1

In [5]:
class FaceKeyPointDataset(Dataset):
    def __init__(self, file, image_folder, transform):
        self.file = pd.read_csv(file)
        self.path = Path(os.getcwd()) / image_folder
        self.transform = transform 
        
    def __len__(self):
        return len(self.file)
    
    def __getitem__(self, x):
        data = self.file.iloc[x]
        image_location = os.path.join(self.path, data[0])
        image = img.imread(image_location)
        
        if (image.shape[2] == 4):
            image = image[:,:,0:3]
            
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
#         image = image.transpose((2, 0, 1))    
        
        key_points = data[1:].values.astype('float').reshape(-1,2)
        if self.transform:
            image = self.transform(image)
        data = {'image': image, 'keypoints': key_points}
        return data

In [6]:
tranformations = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(),
    transforms.RandomCrop(50),
    transforms.Resize((255)),
    transforms.ToTensor(),
        transforms.Normalize(mean=[0],
                         std=[1]),
    
])

In [7]:
face_datasets = FaceKeyPointDataset('data/training_frames_keypoints.csv', 'data/training', tranformations)
test_datasets = FaceKeyPointDataset('data/test_frames_keypoints.csv', 'data/test/', tranformations)

In [8]:
len(face_datasets),len(test_datasets)

(3462, 770)

In [9]:
import torch.nn as nn
import torch.nn.functional as F

In [10]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 5),
            nn.MaxPool2d(3, 3),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 5),
            nn.MaxPool2d(3, 3),
            nn.ReLU(),
        )

        self.line1 = nn.Sequential(nn.Linear(64 * 26 * 26,10000), nn.ReLU())
        self.line2 = nn.Sequential(nn.Linear(10000,1000), nn.ReLU())
        self.line3 = nn.Sequential(nn.Linear(1000,136), nn.ReLU())

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
#         print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.line1(x)
        x = self.line2(x)
        x = self.line3(x)
        return x

In [11]:
net = Net()

In [12]:
train_loader = DataLoader(face_datasets, batch_size=64, shuffle=True, num_workers=5)
test_loader = DataLoader(face_datasets, batch_size=64, shuffle=True, num_workers=5)

In [13]:
import torch.optim as optim

optimizer = torch.optim.RMSprop(net.parameters(), lr=0.0001, momentum=0.9)
criterion=nn.MSELoss()

In [14]:
net.train()
for epoch in range(30):
    running_loss = 0.0
    for batch, data in enumerate(train_loader):
        image = data['image']
        keypoint = data['keypoints']
        keypoint = keypoint.view(keypoint.size(0), -1).float()
        output = net(image)
        loss = criterion(output, keypoint)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step
        running_loss += loss.item()
#         print('Batch: {}, Loss: {}'.format(batch + 1, running_loss))
#     clear_output()
    
    print('Epoch: {}, Avg. Loss: {}'.format(epoch + 1, running_loss/3462), end="")
    
    total = 0
    error = 0
    for data in test_loader:
        image = data['image']
        keypoint = data['keypoints']
        keypoint = keypoint.view(keypoint.size(0), -1).float()
        outputs = net(image)
        total += keypoint.size(0)
        error += (((keypoint - outputs)**2)/64).sum()
        break
    print(f'\tError : {100 * error / total}')

KeyboardInterrupt: 