# Project - Computer Vision #2
 
                                 by ARYAN JAIN

#### SUMMARY

<b>Context:</b>
    Company X owns a movie application and repository which caters movie streaming to millions of users who on 
    subscription basis.Company wants to automate the process of cast and crew information in each scene from a 
    movie such that when a user pauses on the movie and clicks on cast information button, the app will show 
    details of the actor in the scene. Company has an in-house computer vision and multimedia experts who need to detect faces from screen shots 
    from the movie scene.
    
    
<b>Data Description:</b>    
    The dataset comprises of images and its mask where there is a human face
    
    File name: Part1-Traindata-images.npy
    
<b>Domain:</b>
 Entertainment
    
    
<b>Objectives:</b>
    Face detection from training images
 
<b>Key Tasks:</b>

    - Import the data
    - Create features (images) and labels (mask) using that data
    - Design a face mask detection model, using U-net along with pre-trained transfer learning models
    - Design own Dice Coefficient and Loss function. Train, tune and test the model
    - Evaluate the model using testing data
    - Use the “Prediction image” as an input to your designed model and display the output of the image

### Importing Libraries

In [None]:
# data analysis
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Deep Learning
import cv2
from skimage import io,transform
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.layers import Concatenate, UpSampling2D, Conv2D, Reshape, Activation, BatchNormalization, SpatialDropout2D
from tensorflow.python.keras.preprocessing.image import image, load_img, ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.backend import log, epsilon

import tensorflow as tf

# Python Imaging Library; for opening, manipulating, and saving many different image file formats#
import PIL
from PIL import Image

# warnings
import warnings
warnings.simplefilter("ignore")

## PART 1

In [None]:
# Understand the dataset file

In [None]:
# Dataset file name: Part1-Traindata-images.npy
# NPY file means file created by NumPy python library
# It contains an array saved in the NumPy file format. 
# These NPY files contain the data needed to recontruct the data, like the datatypes and shape

In [None]:
# Load training dataset

In [None]:
# Need to keep allow_pickle = True otherwise it thows an error
data = np.load('Part 1- Train data - images.npy', allow_pickle=True) 

In [None]:
# Investigating the data size
data.shape

In [None]:
# The training data file has 409 rows, 2 columns
# Based on project description, it seems the file contains 409 images & corresponding detail

# I will investigate to validate

In [None]:
# Display first column

import cv2
import matplotlib.pyplot as plt

plt.imshow(data[7][0]);

In [None]:
# The first column contains image

In [None]:
# Display 2nd column

data[10][1]

In [None]:
# The 2nd column is bouding box coordinates, including the label, around the human face

In [None]:
# Display few more images to confirm

fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(data[98][0])
plt.show()

In [None]:
plt.imshow(data[147][0]);

In [None]:
plt.imshow(data[252][0]);

In [None]:
# Display 2nd column for one of the above images, expecting it to be bounding box co-ordinates

data[100][1]

In [None]:
# Create features & labels

# Based on the project description and understanding the training dataset:
        # the feature means images
        # labels means face mask (identified using box coordinates)

In [None]:
# Instantiate numpy arrays for facemasks and images with zero
# Use the array size = number of images in the training data set

# The intent is to use MobileNet for the pre-trained layers:
        # Input size should be 224x224 for masks
        # input size should be 224x224x3 for images, last dimension is for channels
        # We will also have to resize images to 224x224

# library used:
    # from tensorflow.keras.applications.mobilenet import preprocess_input

# Instantiating numpy arrays
masks  = np.zeros((int(data.shape[0]), 224, 224))              #array of size 224x224, initialized with ZEROs
images = np.zeros((int(data.shape[0]), 224, 224, 3))           #array of size 224x224x3, initialized with ZEROs

# Resizing images to 224x224
for i in range(data.shape[0]):
    img = data[i][0]
    img = cv2.resize(img, dsize = (224, 224), interpolation = cv2.INTER_CUBIC)
    try:
        img = img[:, :, :3]
    except:
        continue
    images[i] = preprocess_input(np.array(img, dtype = np.float32)) # pre-process the input as needed by Mobile Net
    
    
    for index in data[i][1]:
        # get the bounding box co-ordinates and create the mask
        x1 = int(index['points'][0]['x'] * 224)
        x2 = int(index['points'][1]['x'] * 224)
        y1 = int(index['points'][0]['y'] * 224)
        y2 = int(index['points'][1]['y'] * 224)
        #Generate the face mask bounding box
        masks[i][y1:y2, x1:x2] = 1            # remaining regions are marked as 0 when we initialized

In [None]:
# Let's look at shape of the image and mask arrays created

images.shape, masks.shape

In [None]:
# We have resized the images to 224x224x3, stored in a dataframe called images
# The bounding box image based on the co-ordinates is stored in a dataframe called masks

In [None]:
# Let's display few images from the dataframe:

    # I will look at same images displayed in earlier steps, to understand impact of resizing

In [None]:
plt.imshow(images[7]);

In [None]:
plt.imshow(images[98]);

In [None]:
# Location of the face is yellow

plt.imshow(masks[98]);

In [None]:
# What's the strategy:

    # Define variables
    # Splitting the data
    # Using MobileNet with ImageNet weights, U-Net layers at the end
    # The U-Net model using pre-trained ImageNet as backbone
    # Upsampling in the final layers

In [None]:
# setting values for variables
random_s = 0               
test_s = 0.2 

alpha_s = 1 
# Alpha value of 1 to get the entire MobileNet

img_h  = 224   # Image height
img_w  = 224   # Image width
img_c = 3
msk_h  = 224   # Mask height
msk_w  = 224   # Mask width

In [None]:
# Split the dataset

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(images, masks, test_size = test_s, random_state = random_s, shuffle = False)

# Display training and validation data shapes

X_train.shape, X_val.shape, y_train.shape, y_val.shape

In [None]:
# Per above steps, I will be using MobileNet as the base model
    # Let's take a look at base MobileNet model summary:
    
from tensorflow.keras.applications.mobilenet import MobileNet

mn = MobileNet(input_shape=(img_h, img_w, 3), include_top=True, alpha=alpha_s, weights="imagenet")
mn.summary()

In [None]:
# Next: creating the Mask detection model

# Libraries (already declared in the beginning of this notebook)

# from tensorflow.keras.layers import Concatenate, Conv2D, Reshape, UpSampling2D, BatchNormalization
# from tensorflow.keras.models import Model

In [None]:
def conv_block(prevlayer, filters, prefix, strides=(1, 1)):
    conv = Conv2D(filters, (3, 3), padding = 'same', kernel_initializer = 'he_normal', strides = strides, name = prefix + '_conv')(prevlayer)
    conv = BatchNormalization(name = prefix + 'BatchNormalization')(conv)
    conv = Activation('relu', name = prefix + 'ActivationLayer')(conv)
    return conv

In [None]:
# Library to use, already imported in earlier step
# from tensorflow.keras.applications.mobilenet import MobileNet


# Function to create model

def create_model(trainable = True):
    model = MobileNet(input_shape = (img_h, img_w, img_c), include_top = False, alpha = alpha_s, weights = 'imagenet')
    for layer in model.layers:
        layer.trainable = trainable
    
    block1 = model.get_layer('conv_pw_13_relu').output
    block2 = model.get_layer('conv_pw_11_relu').output
    block3 = model.get_layer('conv_pw_5_relu').output
    block4 = model.get_layer('conv_pw_3_relu').output
    block5 = model.get_layer('conv_pw_1_relu').output
    
    up1 = Concatenate()([UpSampling2D()(block1), block2])
    conv6 = conv_block(up1, 256, 'Conv_6_1')
    conv6 = conv_block(conv6, 256, 'Conv_6_2')

    up2 = Concatenate()([UpSampling2D()(conv6), block3])
    conv7 = conv_block(up2, 256, 'Conv_7_1')
    conv7 = conv_block(conv7, 256, 'Conv_7_2')

    up3 = Concatenate()([UpSampling2D()(conv7), block4])
    conv8 = conv_block(up3, 192, 'Conv_8_1')
    conv8 = conv_block(conv8, 128, 'Conv_8_2')

    up4 = Concatenate()([UpSampling2D()(conv8), block5])
    conv9 = conv_block(up4, 96, 'Conv_9_1')
    conv9 = conv_block(conv9, 64, 'Conv_9_2')

    up5 = Concatenate()([UpSampling2D()(conv9), model.input])
    conv10 = conv_block(up5, 48, 'Conv_10_1')
    conv10 = conv_block(conv10, 32, 'Conv_10_2')
    conv10 = SpatialDropout2D(0.2)(conv10)
    
    x = Conv2D(1, (1, 1), activation = 'sigmoid')(conv10)
    x = Reshape((img_h, img_w))(x)
    return Model(inputs = model.input, outputs = x)

In [None]:
# Calling the create_model function

model = create_model(True)
model.summary()

In [None]:
# Dice Coefficient and Loss function; define functions to calculate:

# Dice Coefficient = (2*|X ☊ Y|) / (|X| + |Y|)   # X = predicted set of pixels, Y = ground truth

   # Dice Coefficient 
   # Loss using binary cross-entropy function from keras.losses and the calculated dice co-efficient

In [None]:
def dice_coefficient(y_true, y_pred):
    num = 2 * tf.reduce_sum(y_true * y_pred)
    den = tf.reduce_sum(y_true + y_pred)

    return num / (den + tf.keras.backend.epsilon()) # Adding the epsilon value to make sure we are not dividing by 0

In [None]:
def loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) - log(dice_coefficient(y_true, y_pred) + epsilon())

In [None]:
# Next: Compile the model
    # Optimizer => adam
    # Metrics => dice coefficient
    # Loss => binary cross-entropy

In [None]:
# Define optimizer and compile the model

adam = Adam(lr = 1e-4, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, decay = 0.0, amsgrad = False)
model.compile(loss = loss, optimizer = adam, metrics = [dice_coefficient])

<font color='Blue'>We will use the Adam optimizer and use our defined loss function and dice_coefficient as metric</font>

In [None]:
# Define Callbacks

In [None]:
# Define Checkpoint 

# The best performing model weights will be saved (see first parameter in ModelCheckpoint)

checkpoint = ModelCheckpoint('model_{loss:.2f}.h5', monitor = 'val_loss', verbose = 1, 
                             save_best_only = True, save_weights_only = True, mode = 'min', period = 1)

stop = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min')

reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 5, min_lr = 1e-6, verbose = 1, mode = 'min')

In [None]:
# Fit / Train the model

model.fit(X_train, y_train, epochs = 30, batch_size = 1, callbacks = [checkpoint, reduce_lr, stop], 
          validation_data = (X_val, y_val))

In [None]:
# Evaluating the model

model.evaluate(X_val, y_val, verbose = 1)

In [None]:
# As mentioned above: 
     # the best performing model weights is saved in model_0.29.h5
     # I will evaluate performance with this weight as well

In [None]:
# Let's predict the Mask for the test image

In [None]:
import matplotlib.pyplot as plt
filename = 'Part 1Test Data - Prediction Image.jpeg' # load the test image

# plot the original test image
p_img = cv2.imread(filename)
plt.imshow(p_img);

In [None]:
# Lets predict the face mask using our model .
# Working earlier, now error

test_img = cv2.resize(p_img, dsize = (img_w, img_h), interpolation = cv2.INTER_CUBIC)
test_img = test_img[:, :, :3]
MNet_scaled = preprocess_input(np.array(test_img, dtype=np.float32)) # apply pre-processing as needed for MobileNet

# Now lets create the mask on the original test image by marking pixels that are not part of the Face Mask as black
pred_mask = cv2.resize(1.0*(model.predict(x=np.array([MNet_scaled]))[0] > 0.5), (img_w,img_h)) # 0.5 is used as the threshold

# # commenting below due to error
# image_mask = test_img

# image_mask[:,:,0] = pred_mask*image[:,:,0]
# image_mask[:,:,1] = pred_mask*image[:,:,1]
# image_mask[:,:,2] = pred_mask*image[:,:,2]

# plt.imshow(image_mask);

In [None]:
# plt.imshow(pred_mask); # show the image with mask.

In [None]:
# Lets predict the face mask using our model 

In [None]:
images_to_predict=[]

test_img = cv2.resize(p_img, dsize = (img_w, img_h), interpolation = cv2.INTER_CUBIC)
test_img = test_img[:, :, :3]
test_img = preprocess_input(np.array(test_img, dtype = np.float32))
images_to_predict.append(test_img)
images_to_predict = np.array(images_to_predict)

test_img.shape, images_to_predict.shape

In [None]:
# Display the image to be predicted

fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(test_img);
plt.savefig('image.jpg', bbox_inches = 'tight', pad_inches = 0)

In [None]:
# Load the model weight from training and predict on the test image

model.load_weights('model_0.29.h5')
y_pred = model.predict(np.array(images_to_predict))

In [None]:
# Generate the face mask image
pred_mask = cv2.resize(1.0*(y_pred[0] > 0.1), (224, 224))

image2 = test_img
image2[:,:,0] = pred_mask*test_img[:,:,0]
image2[:,:,1] = pred_mask*test_img[:,:,1]
image2[:,:,2] = pred_mask*test_img[:,:,2]
out_image = image2

fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(out_image)

fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(pred_mask, alpha = 1)
plt.savefig('mask.jpg', bbox_inches = 'tight', pad_inches = 0)

In [None]:
# We will now show the face mask on the original image

img  = cv2.imread('image.jpg', 1)
mask = cv2.imread('mask.jpg', 1)
img  = cv2.add(img, mask)
imposed_img = cv2.addWeighted(src1=img, alpha=1, src2=mask, beta=0.0, gamma=0)

fig = plt.figure(figsize = (15, 7.2))
ax = fig.add_subplot(1, 1, 1)
plt.axis('off')
plt.imshow(imposed_img, alpha = 1)

In [None]:
# pickle the model
model.save("Face Detection Model by Aryan Jain.h5")

### END OF PART 1 OF THE PROJECT