# W207 Final Project

## Submission by Sirisha Bhupathi and Abhi Sharma

This project is for the Kaggle competition listed here: https://www.kaggle.com/c/facial-keypoints-detection

## Objective

The objective of this project is to predict keypoint positions on face images.

## Introduction

The image sizes are 96 x 96 pixels and the key points are represented as location co-ordinates for each image. 
Location co-ordinates can have 2 values - one for x and one for y. 
There are a total of 30 location co-ordinates per image, 15 each for the x and y axis.
Out of the 30 co-ordinates in the test dataset for a single image, some co-ordinates are present and others need to be predicted. 
The number and type of co-ordinates that need to be predicted vary per test example.


## Imports and Settings

In [1]:
# Imports

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import sys
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")
from random import seed
from random import randint
import time
import math

# import keras, tf and image depdencies
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import class_weight
import tensorflow_hub as hub
from tensorflow.keras import layers
from PIL import Image, ImageDraw, ImageOps, ImageEnhance
import cv2

In [None]:
# We verify if the GPU is working and available with the following commands
# Adapted from here: https://www.tensorflow.org/guide/gpu

tf_config = tf.compat.v1.ConfigProto(allow_soft_placement=False)
tf_config.gpu_options.allow_growth = True
s = tf.compat.v1.Session(config=tf_config)
tf.compat.v1.keras.backend.set_session(s)

gpus = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpus))
#tf.debugging.set_log_device_placement(True)

# Create some tensors
a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)

print(c)


if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

## Data Loading

In [None]:
# Constants
IMG_DIM = 96
PIX_MAX = 255
IMAGE = 'Image'
COLUMNS = ['left_eye_center_x', 'left_eye_center_y', 'right_eye_center_x',
       'right_eye_center_y', 'left_eye_inner_corner_x',
       'left_eye_inner_corner_y', 'left_eye_outer_corner_x',
       'left_eye_outer_corner_y', 'right_eye_inner_corner_x',
       'right_eye_inner_corner_y', 'right_eye_outer_corner_x',
       'right_eye_outer_corner_y', 'left_eyebrow_inner_end_x',
       'left_eyebrow_inner_end_y', 'left_eyebrow_outer_end_x',
       'left_eyebrow_outer_end_y', 'right_eyebrow_inner_end_x',
       'right_eyebrow_inner_end_y', 'right_eyebrow_outer_end_x',
       'right_eyebrow_outer_end_y', 'nose_tip_x', 'nose_tip_y',
       'mouth_left_corner_x', 'mouth_left_corner_y', 'mouth_right_corner_x',
       'mouth_right_corner_y', 'mouth_center_top_lip_x',
       'mouth_center_top_lip_y', 'mouth_center_bottom_lip_x',
       'mouth_center_bottom_lip_y']

CWD = '/project/notebooks'
AUGMENTATIONS = ["90R", "90L", "180H", "180V", "NoiseGaussian", "NoiseSaltPepper", "NoisePoisson", "NoiseSpeckle", "IncreaseBright", "IncreaseDark"]

In [None]:
folder_prefix = '..'
if os.getcwd() == CWD:
    folder_prefix = '/project/kaggle'

In [None]:
train = pd.read_csv(folder_prefix + '/input/facial-keypoints-detection/training.zip')
test = pd.read_csv(folder_prefix + '/input/facial-keypoints-detection/test.zip')
idlookup = pd.read_csv(folder_prefix + '/input/facial-keypoints-detection/IdLookupTable.csv')

In [None]:
print('Train shape:',train.shape)
print('Test shape:',test.shape)

In [None]:
train.head().T

In [None]:
test.head()

## Data Validation

In [None]:
# Check for missing values
def check_missing_vals(data):
    return data.isnull().any().value_counts()
    
def fill_missing_with_col_mean(data):
    for col in COLUMNS:
        mean = np.mean(data[col])
        data[col] = data[col].fillna(mean)
    return data

In [None]:
check_missing_vals(train)

In [None]:
check_missing_vals(test)

In [None]:
train = fill_missing_with_col_mean(train)
check_missing_vals(train)

## Feature Engineering and Augmentation

In [None]:
# Split image column and label columns

train_images = train[[IMAGE]]
train_labels = train.drop(IMAGE, axis=1)

train_labels.head()

In [None]:
# Split train image pixels from string to 1 pixel per column and convert each pixel from string to float

train_images = train_images[IMAGE].str.split(' ', expand=True)
train_images = train_images.astype(float)

train_images.head()

In [None]:
# Split test image pixels from string to 1 pixel per column and convert each pixel from string to float

test_images = test.copy()
test_images = test_images.set_index('ImageId')
test_images = test_images[IMAGE].str.split(' ', expand=True)
test_images = test_images.astype(float)

test_images.head()

In [None]:
num_train_examples = train_images.shape[0]
num_train_examples

In [None]:
# Utility function to reshape image(s) to 2d
# Note that a dataframe is converted into a numpy array as a result of this transform
# Thus it is the user's responsibility to convert the array back to a dataframe if need be

def img_reshape_2d(data, width=IMG_DIM, height=IMG_DIM):
    data_2d = data.values.reshape(width, height)
    # returns data of shape (width, height)
    return data_2d

def multiple_img_reshape_2d(data, width=IMG_DIM, height=IMG_DIM):
    data_2d = data.values.reshape(-1, width, height, 1)
    # returns data of shape (rows, width, height, color channels)
    return data_2d

In [None]:
# Plot images and keypoints function
# if img_num is a valid value, we display that image only

def plot_images(train_images, train_labels, start_index=0, end_offset=1000, img_num=-1):
    rows = 4
    cols = 4
    multiplier = 10
    dot_size = 100
    
    if img_num != -1:
        dot_size = 50
        multiplier = 5
        rows = 1
        cols = 1
        
    fig = plt.figure(figsize=(rows * multiplier, cols * multiplier), constrained_layout = False)
    for i in range(1, cols * rows + 1):
        ax = fig.add_subplot(rows, cols, i)
        if img_num != -1:
            image_no = img_num
        else:
            image_no = randint(start_index, start_index+end_offset)
        img = np.array(train_images.iloc[image_no]).reshape(IMG_DIM, IMG_DIM)
        ax.imshow(img, cmap='gray')
        for j in range(0, 30, 2):
            ax.scatter(train_labels.iloc[image_no][j], train_labels.iloc[image_no][j+1], s=dot_size)
    plt.show()

    
def plot_image_with_label_optional(img, labels=pd.DataFrame()):
    fig = plt.figure(figsize=(5, 5), constrained_layout = False)
    # reshapes array in case it is given as flat representation, ie, 9216 pixels = 96 x 96 image
    img = np.array(img).reshape(IMG_DIM, IMG_DIM)
    ax = fig.add_subplot(1, 1, 1)
    ax.imshow(img, cmap='gray')
    if labels.empty == False:
        for j in range(0, 30, 2):
            ax.scatter(labels[j], labels[j+1], s=50)
    return fig

In [None]:
# Display single image giving index as input
plot_images(train_images, train_labels, img_num=20)

In [None]:
# Display another image directly giving image and label as input
plot_image_with_label_optional(train_images.iloc[155], train_labels.iloc[155]).show()

In [None]:
# Display same image without any labels
plot_image_with_label_optional(train_images.iloc[155]).show()

In [None]:
# Display many images
plot_images(train_images, train_labels)

In [None]:
# Augmentation function to generate more images

def augment_images(train_img_set, train_label_set, aug_list):
    tick = time.perf_counter()
    for aug in aug_list:
        if aug not in AUGMENTATIONS:
            raise NameError("Augmentation -- {0} -- not valid".format(aug))
    
    if len(train_img_set) != len(train_label_set):
        raise ValueError("Train image set size must match label set size")
    
    output_imgs_flat = []
    output_imgs_2d = []
    output_label_set = []
    angle = 0
    for aug in aug_list:
        for idx, img in train_img_set.iterrows():

            labels = list(train_label_set.iloc[idx].copy())
            input_image = img.copy()
            if img.shape != (IMG_DIM, IMG_DIM):
                #print("Converting image to 2d")
                input_image = img_reshape_2d(input_image)

            output_image = Image.fromarray(input_image)
            if aug == "90R":
                # rotate 90 degrees to the right
                output_image = output_image.rotate(270)
                angle = 90
            elif aug == "90L":
                # rotate 90 degrees to the left
                output_image = output_image.rotate(90)
                angle = -90
            elif aug == "180H":
                # horizontal flip
                output_image = ImageOps.mirror(output_image)
                angle = 0
            elif aug == "180V":
                # rotate 180 degrees vertically
                output_image = output_image.rotate(180)
                angle = 180
            elif aug == "NoiseGaussian" or aug == "NoiseSaltPepper" or aug == "NoisePoisson" or aug == "NoiseSpeckle":
                output_image = add_noise_to_img(input_image, aug)
                angle = 0
            elif aug == "IncreaseBright":
                enhancer = ImageEnhance.Brightness(output_image)
                output_image = enhancer.enhance(1.5)
                angle = 0
            elif aug == "IncreaseDark":
                enhancer = ImageEnhance.Brightness(output_image)
                output_image = enhancer.enhance(0.5)
                angle = 0
            
            output_image = np.array(output_image)
            output_imgs_flat.append(output_image.reshape(IMG_DIM * IMG_DIM))
            output_imgs_2d.append(output_image)
            
            labels = rotate((IMG_DIM / 2, IMG_DIM / 2), labels, angle)
            output_label_set.append(labels)
    
    output_imgs_flat = np.array(output_imgs_flat)
    output_imgs_2d = np.array(output_imgs_2d).reshape(-1, IMG_DIM, IMG_DIM, 1)
    output_label_set = np.array(output_label_set)
    tock = time.perf_counter()
    print("Time elapsed (sec) for augmentation is {0}".format(tock-tick))
    
    return output_imgs_flat, output_imgs_2d, output_label_set



In [None]:
img = img_reshape_2d(train_images.iloc[11])
print(img.shape)
plot_image_with_label_optional(img).show()

In [None]:
# Utility function for adding noise to the image. Note that the keypoints are not altered in such a case

def add_noise_to_img(image, noise_type = "NoiseGaussian"):
    
    row, col = image.shape
    # channel is always 1 because we are dealing with grey scale images
    ch = 1
    if noise_type == "NoiseGaussian":
        mean = 100
        var = 100
        sigma = var ** 0.5
        gauss = np.random.normal(mean, sigma, (row, col, ch))
        gauss = gauss.reshape(row, col)
        noisy = image + gauss

    elif noise_type == "NoiseSaltPepper":
        s_vs_p = 0.5
        amount = 0.04
        noisy = np.copy(image)
        # Salt mode
        num_salt = np.ceil(amount * image.size * s_vs_p)
        coords = [np.random.randint(0, i - 1, int(num_salt)) for i in image.shape]
        noisy[coords] = 1

        # Pepper mode
        num_pepper = np.ceil(amount * image.size * (1. - s_vs_p))
        coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in image.shape]
        noisy[coords] = 0
  
    elif noise_type == "NoisePoisson":
        vals = len(np.unique(image))
        vals = 2 ** np.ceil(np.log2(vals))
        noisy = np.random.poisson(image * vals) / float(vals)
    
    elif noise_type == "NoiseSpeckle":
        gauss = np.random.randn(row, col, ch)
        gauss = gauss.reshape(row, col)        
        noisy = image + image * gauss
    
    return noisy

In [None]:
# https://stackoverflow.com/questions/34372480/rotate-point-about-another-point-in-degrees-python
def rotate(origin, points, angle):
    if angle == 0:
        return points
    
    angle = math.radians(angle)
    result = []
    ox, oy = origin
    for p in range(0, len(points), 2):
        px, py = points[p], points[p+1]

        qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
        qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
        result.append(qx)
        result.append(qy)
    return result

In [None]:
temp_train_img_flat, temp_train_img_2d, temp_train_labels = augment_images(train_images, train_labels, ["90L", "90R", "180V", "NoiseGaussian", "NoiseSpeckle", "NoisePoisson", "NoiseSaltPepper"])

print(temp_train_img_flat.shape)
print(temp_train_img_2d.shape)
print(temp_train_labels.shape)

In [None]:
temp_train_labels_df = pd.DataFrame(temp_train_labels, columns = COLUMNS)

# Plot left rotation for single image
plot_image_with_label_optional(temp_train_img_flat[155], temp_train_labels_df.iloc[155]).show()

In [None]:
# Plot left rotation for single image with 2d numpy array instead of 1d numpy array
plot_image_with_label_optional(temp_train_img_2d[155], temp_train_labels_df.iloc[155]).show()

In [None]:
# Plot right rotation for single image
plot_image_with_label_optional(temp_train_img_2d[155+num_train_examples], temp_train_labels_df.iloc[155+num_train_examples]).show()

In [None]:
# Plot upside down image rotation for single image
plot_image_with_label_optional(temp_train_img_flat[155+(num_train_examples*2)], temp_train_labels_df.iloc[155+(num_train_examples*2)]).show()

In [None]:
# Plot mirror image rotation for single image
# plot_image_with_label_optional(temp_train_img_flat[155+(num_train_examples*3)], temp_train_labels_df.iloc[155+(num_train_examples*3)]).show()

## Data Preparation

In [None]:
# We will need to append the augmented data to the end of our training sets (both images and labels)
print(train_images.shape)
print(temp_train_img_flat.shape)
print(type(train_images))
print(type(temp_train_img_flat))

print(train_labels.shape)
print(temp_train_labels_df.shape)
print(type(train_labels))
print(type(temp_train_labels_df))


In [None]:
temp_train_img_flat = pd.DataFrame(temp_train_img_flat, columns = range(0, IMG_DIM * IMG_DIM))
temp_train_img_flat.head()

In [None]:
train_images = train_images.append(temp_train_img_flat, ignore_index=True)
train_labels = train_labels.append(temp_train_labels_df, ignore_index=True)

print(train_images.shape)
print(train_labels.shape)

In [None]:
train_images.head()

In [None]:
train_labels.head()

In [None]:
train_images_2d = multiple_img_reshape_2d(train_images)
test_images_2d = multiple_img_reshape_2d(test_images)

print(train_images_2d.shape)
print(test_images_2d.shape)

**Deep learning models are known to work well with normalized data. Thus we normalize the image pixel values with the highest pixel value intensity, ie, 255. We also normalize the labels with the width of the image size.**

In [None]:
# Normalize train and test data

train_images = train_images / PIX_MAX
test_images = test_images / PIX_MAX
train_images_2d = train_images_2d / PIX_MAX
test_images_2d = test_images_2d / PIX_MAX

train_labels = train_labels / IMG_DIM

In [None]:
# We now have more samples than the standard training set, which will aid us in our training of the model
num_train_examples = train_images.shape[0]
num_train_examples

## Modeling and Training

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras import optimizers
from keras.layers import Conv2D,Dropout,Dense,Flatten
from keras.models import Sequential
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential, Model
from keras.layers import Activation, Convolution2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Dropout, Conv2D,MaxPool2D, ZeroPadding2D
import tensorflow as tf

model = Sequential()

# Input dimensions: (None, 96, 96, 1)
model.add(Convolution2D(32, (3,3), padding='same', use_bias=False, input_shape=(96,96,1)))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

# Input dimensions: (None, 96, 96, 32)
model.add(Convolution2D(32, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

# Input dimensions: (None, 48, 48, 32)
model.add(Convolution2D(64, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

# Input dimensions: (None, 48, 48, 64)
model.add(Convolution2D(64, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

# Input dimensions: (None, 24, 24, 64)
model.add(Convolution2D(96, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

# Input dimensions: (None, 24, 24, 96)
model.add(Convolution2D(96, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

# Input dimensions: (None, 12, 12, 96)
model.add(Convolution2D(128, (3,3),padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

# Input dimensions: (None, 12, 12, 128)
model.add(Convolution2D(128, (3,3),padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

# Input dimensions: (None, 6, 6, 128)
model.add(Convolution2D(256, (3,3),padding='same',use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

# Input dimensions: (None, 6, 6, 256)
model.add(Convolution2D(256, (3,3),padding='same',use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

# Input dimensions: (None, 3, 3, 256)
model.add(Convolution2D(512, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

# Input dimensions: (None, 3, 3, 512)
model.add(Convolution2D(512, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

# Input dimensions: (None, 3, 3, 512)
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(30))


model.compile(optimizer='adam', 
              loss='mean_squared_error',
              metrics=['mse','mae'])

history = model.fit(train_images_2d, train_labels, epochs = 50, batch_size = int(num_train_examples/1000),validation_split = 0.2)

In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
test_loss = history.history['val_loss']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

# Visualize loss history
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.legend(['Training Loss', 'Test Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
# Predict for test
pred = model.predict(test_images_2d)
print(np.min(pred))
print(np.max(pred))
print(np.min(model.predict(train_images_2d)))

## Submission to Kaggle for Scoring

In [None]:
# Look up table
print(idlookup.head(2))
idlookup = idlookup.drop('Location',axis=1)
print(idlookup.head(2))

In [None]:
feature_names = train_labels.columns
predictions = pd.DataFrame(pred, columns = feature_names)
predictions = predictions * IMG_DIM

predictions.head()
predictions.stack().reset_index()
predictions = predictions.stack().reset_index()
predictions.columns = ['index','FeatureName','Location']

imageids = test['ImageId']
imageids = imageids.reset_index()

predictions = predictions.merge(imageids, left_on='index', right_on='index')
predictions = predictions.drop('index',axis=1)

predictions

In [None]:
submission = idlookup.merge(predictions, left_on=['FeatureName','ImageId'], right_on=['FeatureName','ImageId'])
submission = submission[['RowId','Location']]
submission = submission.set_index('RowId')
submission

In [None]:
# Save submission result
    
if os.getcwd() == CWD:
    os.chdir('/project')
    # make submissions directory if it doesnt exist
    try:
        os.makedirs('submissions')
    except OSError as e:
        pass
    
    now = datetime.now()
    dt_string = now.strftime("%d/%m/%Y %H:%M:%S").replace('/','').replace(':','').replace(' ','')
    submission.to_csv('submissions/submission-{0}.csv'.format(dt_string))
    os.chdir(CWD)
    
else:
    submission.to_csv('submission.csv')