In [1]:
import torchvision
import torch.nn as nn
import torch
import torch.nn.functional as F
from torchvision import transforms, models, datasets
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader, TensorDataset

from copy import deepcopy
import numpy as np
import pandas as pd
import os, glob, cv2

import matplotlib.pyplot as plt
from sklearn import cluster

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [2]:
!git clone https://github.com/udacity/P1_Facial_Keypoints.git
!cd P1_Facial_Keypoints
root_dir = 'P1_Facial_Keypoints/data/training/'
all_img_paths = glob.glob(os.path.join(root_dir, '*.jpg'))
data = pd.read_csv('P1_Facial_Keypoints/data/training_frames_keypoints.csv')

fatal: destination path 'P1_Facial_Keypoints' already exists and is not an empty directory.


In [3]:
data.shape #even cols: x-coordinate, odd cols: y-coordinate // total 68 key points

(3462, 137)

In [4]:
#define dataset class
class FacesDataset(Dataset):
  def __init__(self, df) -> None:
    super().__init__()
    self.df = df
    self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225])
    
  def __len__(self): return len(self.df)

  def __getitem__(self, index):
    image_path = "P1_Facial_Keypoints/data/training/" + self.df.iloc[index, 0]
    img = cv2.imread(image_path, cv2.COLOR_BGR2RGB)/255.
    raw_keypoints = deepcopy(self.df.iloc[index, 1:].tolist())
    x_coords = (np.array(raw_keypoints[0::2])/img.shape[1]).tolist()
    y_coords = (np.array(raw_keypoints[1::2])/img.shape[0]).tolist()
    merged_coords = x_coords + y_coords
    merged_coords = torch.tensor(merged_coords)
    img = self.perprocess_input(img)
    return img, merged_coords

  def perprocess_input(self, img):
    img = cv2.resize(img, (224, 224))
    img = torch.tensor(img).permute(2, 0, 1)
    img = self.normalize(img).float()
    return img.to(device)

  def load_img(self, ix):
        img_path = 'P1_Facial_Keypoints/data/training/' + self.df.iloc[ix,0]        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)/255.
        img = cv2.resize(img, (224,224))
        return img 

In [5]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.2, random_state=101, shuffle=True)

train_dataset = FacesDataset(train_data.reset_index(drop=True))
test_dataset = FacesDataset(test_data.reset_index(drop=True))

train_dataloader = DataLoader(train_dataset, batch_size = 32)
test_dataloader = DataLoader(test_dataset, batch_size = 32)

In [6]:
#define model
def get_model():
  model = models.vgg16(weights='VGG16_Weights.IMAGENET1K_V1')
  for param in model.parameters():
    param.requires_grads = False
  
  model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 136),
            nn.Sigmoid()
  )
  loss_fn = nn.L1Loss()
  optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
  return model.to(device), loss_fn, optimizer

In [7]:
def train_batch(imgs, kps, model, loss_fn, optimizer):
  model.train()
  optimizer.zero_grad()

  preds = model(imgs)
  loss = loss_fn(preds, kps.to(device))
  loss.backward()

  optimizer.step()
  return loss.item()

@torch.no_grad()
def validate_batch(imgs, kps, model, loss_fn):
  model.eval()
  preds = model(imgs)
  loss = loss_fn(preds, kps.to(device))
  return preds, loss.item()

In [8]:
VGG_model, loss_fn, optimizer = get_model()

In [9]:
import time
train_loss, test_loss = [], []
n_epochs = 50

for epoch in range(n_epochs):
  start = time.time()
  epoch_train_loss, epoch_test_loss = [],  []

  for idx, batch in enumerate(train_dataloader):
    imgs, kps = batch
    batch_loss = train_batch(imgs, kps, VGG_model, loss_fn, optimizer)
    epoch_train_loss.append(batch_loss)
  train_loss.append(np.mean(epoch_train_loss))

  for idx, batch in enumerate(test_dataloader):
    imgs, kps = batch
    batch_preds, batch_loss = validate_batch(imgs, kps, VGG_model, loss_fn)
    epoch_test_loss.append(batch_loss)
  test_loss.append(np.mean(epoch_test_loss))

  print(f"Epoch [{epoch+1}] :  Train Loss: {epoch_train_loss[-1]:0.3f}   Test Loss: {epoch_test_loss[-1]:0.3f} Time: {time.time() - start}")


Epoch [1] :  Train Loss: 0.041   Test Loss: 0.033 Time: 282.570538520813
Epoch [2] :  Train Loss: 0.032   Test Loss: 0.023 Time: 301.5078809261322
Epoch [3] :  Train Loss: 0.026   Test Loss: 0.018 Time: 306.96053767204285
Epoch [4] :  Train Loss: 0.026   Test Loss: 0.016 Time: 287.42065262794495


KeyboardInterrupt: 