In [1]:
import numpy as np 
import pandas as pd 
import math
import time
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from PIL import Image
import os
import glob
import cv2
from os import walk
import matplotlib.image as mpimg

In [2]:
def relu(z): #relu
    #sig = 1/(1+np.exp(-z))
    sig = np.where(z>0,z,0)
    return sig

def sigmoid(z):
    sig = 1/(1+np.exp(-z))
    return sig

def grad_sigmoid(z):
    return sigmoid(z)*(1-sigmoid(z));

def grad_relu(z): #relu_derivative
    ans = np.where(z>0,1,0)
    return ans

def normalize(z):
    mu = np.mean(z)
    size = z.shape[1]

    return (z-63.5)/63.5

In [3]:
mask_images = []
for img_path in glob.glob('../input/face-mask-detection/Dataset/with_mask/*.png'):
    mask_images.append(mpimg.imread(img_path))

unmask_images = []
for img_path in glob.glob('../input/face-mask-detection/Dataset/without_mask/*.png'):
    unmask_images.append(mpimg.imread(img_path))

images = mask_images + unmask_images
data_len = len(images)

imgs = []

for im in images:
    size = im.shape[0]*im.shape[1]*im.shape[2]
    im1 = im.reshape(size, 1)
    imgs.append(im1)


In [4]:
print("Few images of faces with mask :-")
img1 = plt.imread('../input/face-mask-detection/Dataset/with_mask/101.png')
img2 = plt.imread('../input/face-mask-detection/Dataset/with_mask/1024.png')

fig, ax = plt.subplots(1,2)
ax[0].imshow(img1)
ax[1].imshow(img2)

In [5]:
print("Few images of faces without mask :-")
img3 = plt.imread('../input/face-mask-detection/Dataset/without_mask/100.png')
img4 = plt.imread('../input/face-mask-detection/Dataset/without_mask/1020.png')

fig, ax = plt.subplots(1,2)
ax[0].imshow(img3)
ax[1].imshow(img4)

In [6]:
X1 = np.zeros((len(imgs), len(imgs[0])))
for i in range(0,len(imgs)):
    for j in range(0,len(imgs[i])):
        X1[i][j] = imgs[i][j]

length = data_len//2

In [7]:
X = X1
y = np.append(np.ones(length).reshape(length, 1) , np.zeros(length).reshape(length, 1), axis=0)

X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size=0.1)

In [8]:
X_train = X_train.T 
Y_train = Y_train.T
X_test = X_test.T 
Y_test = Y_test.T 

print("X_train shape =", X_train.shape)
print("Y_train shape =", Y_train.shape)

In [9]:
def initialize(layers):
    
    parameters = {}
    v ={}
    s= {}
    
    for l in range(1,len(layers)):
        parameters["W" + str(l)] = np.random.randn(layers[l], layers[l-1])*0.12
        parameters["b" + str(l)] = np.zeros((layers[l], 1))
        v["dW" + str(l)] = np.zeros((layers[l], layers[l-1]))
        v["db" + str(l)] = np.zeros((layers[l], 1))
        s["dW" + str(l)] = np.zeros((layers[l], layers[l-1]))
        s["db" + str(l)] = np.zeros((layers[l], 1))

    return parameters, v, s

In [10]:
def cost(y_pred, y, lamda, parameters):
    
    m = y.shape[1]
    J = 0
    J = (1/m) * sum(sum((-y*np.log(y_pred))-((1-y)*np.log(1-y_pred))))
    for i in range(1,len(parameters)//2+1):
        J += (lamda/(2*m))*np.sum(np.square(parameters["W" + str(i)]))

    return J

In [11]:
def forward_prop(X, parameters, layers):

    A=[]
    Z=[]
    a = X
    cache = {}
    
    L = len(layers)
    
    for l in range(1,L-1):
        a_prev=a
        A.append(a_prev)
        z = np.dot(parameters["W" + str(l)], a_prev) #+ parameters["b" + str(l)]
        Z.append(z)
        a = relu(z)

    z = np.dot(parameters["W" + str(L-1)], a) #+ parameters["b" + str(L-1)]
    A.append(a)
    Z.append(z)
    a = sigmoid(z)

    cache["A"] = A
    cache["Z"] = Z
    
    return a, cache
        

In [12]:
def backward_prop(AL, parameters, cache, y, lamda):
    
    A = cache["A"]
    Z = cache["Z"]
    
    grads={}
    m = y.shape[1] 
    L = len(parameters)//2
    grads["dZ"+str(L)] = AL - y
    grads["dW"+str(L)] = np.dot(grads["dZ"+str(L)], A[L-1].T)/m + lamda*parameters["W" + str(L)]/m
    grads["db"+str(L)] = np.sum(grads["dZ"+str(L)], axis=1, keepdims=True)/m
    
    for l in range(L, 1, -1):
        grads["dZ"+str(l-1)] = np.dot(parameters["W" + str(l)].T, grads["dZ"+str(l)])*grad_relu(Z[l-2])
        grads["dW"+str(l-1)] = np.dot(grads["dZ"+str(l-1)], A[l-2].T)/m + lamda*parameters["W" + str(l-1)]/m
        grads["db"+str(l-1)] = np.sum(grads["dZ"+str(l-1)], axis=1, keepdims=True)/m
        
    return grads

In [13]:
def update_parameters(parameters, v, s, grad, alpha, t=2, beta1=0.9, beta2=0.999, eps=1e-8):
    
    L = len(parameters)//2

    for l in range(1,L+1):
        
        v["dW" + str(l)] = (beta1*v["dW" + str(l)] + (1-beta1)*grad["dW" + str(l)])#/(1-np.power(beta1, t))
        v["db" + str(l)] = (beta1*v["db" + str(l)] + (1-beta1)*grad["db" + str(l)])#/(1-np.power(beta1, t))
        
        v_corrdW = v["dW" + str(l)]/(1-np.power(beta1, t))
        v_corrdb = v["db" + str(l)]/(1-np.power(beta1, t))

        
        s["dW" + str(l)] = (beta2*s["dW" + str(l)] + (1-beta2)*np.square(grad["dW" + str(l)]))#/(1-np.power(beta2, t))
        s["db" + str(l)] = (beta2*s["db" + str(l)] + (1-beta2)*np.square(grad["db" + str(l)]))#/(1-np.power(beta2, t))

        s_corrdW = s["dW" + str(l)]/(1-np.power(beta1, t))
        s_corrdb = s["db" + str(l)]/(1-np.power(beta1, t))

        
        parameters["W" + str(l)] -= alpha*v_corrdW/(np.sqrt(s_corrdW) + eps)#v["dW" + str(l)]/(np.sqrt(s["dW" + str(l)]) + eps)
        parameters["b" + str(l)] -= alpha*v_corrdb/(np.sqrt(s_corrdb) + eps)#v["db" + str(l)]/(np.sqrt(s["db" + str(l)]) + eps)
    
    return parameters, v, s 
    

In [14]:
def mini_batch(X, y, size=64):
    m = X.shape[1] 
    permutation = list(np.random.permutation(m))
    X_new = X[:, permutation]
    y_new = y[:, permutation]
    
    
    batch_size = math.floor(m/size) 
    mini_batches = []
    
    for i in range(batch_size):
        a = X_new[:, i*size:(i+1)*size]
        b = y_new[:, i*size:(i+1)*size]
        mini_batches.append((a, b))
        
    if m%size!=0:
        a = X_new[:, batch_size*size:m]
        b = y_new[:, batch_size*size:m]
        mini_batches.append((a, b))
    return mini_batches
        

In [26]:
def model(X, y, alpha, layers, iterations, lamda, X_test, y_test):
    
    parameters, v, s = initialize(layers)
    costs = []
    m = X.shape[1]

    for i in range(iterations):
        batches = mini_batch(X, y)
        J = 0
        for j in batches:
            (batch_X, batch_y) = j
            A, cache = forward_prop(batch_X, parameters, layers)

            J += cost(A,batch_y, lamda, parameters)
            costs.append(J)
            
            grads = backward_prop(A, parameters, cache, batch_y, lamda)

            parameters, v, s = update_parameters(parameters, v, s, grads, alpha)
        
        if i%10==0:
                print("iteration", i)
    
    return parameters, costs
    
    
    

In [16]:
def predict(X_test, Y_test, alpha, layers, parameters):
    
    y_pred, cache = forward_prop(X_test, parameters, layers)    
    y_pred = np.argmax(y_pred, axis=0).reshape(1,y_pred.shape[1])
    accu = (y_pred == Y_test).mean()

    return accu, y_pred

In [17]:
def onehotEncoding(y):
    onehot=np.array([0,1]).reshape(outcome,1)
    y_=np.zeros((outcome,y.shape[1]))
    for i in range(y.shape[1]):
        y_[:,i,None]=(onehot==y[0,i])  
    
    return y_

In [23]:
def plot(costs):
    plt.plot(costs)
    plt.title("Cost vs iterations")
    plt.xlabel("Iterations")
    plt.ylabel("Cost")

In [24]:
X = X_train
y = Y_train
X_test = X_test 
Y_test = Y_test

alpha = 0.001
iterations = 100
lamda = 0.4
outcome = 2
layers = [X.shape[0], 50, outcome]

y_ = onehotEncoding(y)
parameters, costs = model(X, y_, alpha, layers, iterations, lamda, X_test, Y_test)
plot(costs)
accu, y_pred = predict(X_test, Y_test, alpha, layers, parameters)

print("Accuracy =", accu)


In [25]:
list_of_tuples = list(zip(Y_test.T, y_pred.T))
df = pd.DataFrame(list_of_tuples,
                  columns = ['Actual value', 'Predicted Value'])
df = df.astype(int)
df