In [1]:
import pandas as pd
import numpy as np

In [2]:
def sigmoid(Z):
    
    sig= 1 / ( 1 + np.exp(-Z))
    
    return sig , Z

In [3]:
def relu(Z):
    
    A = np.maximum(0,Z)
    
    return A, Z


In [None]:
def tanh(Z):
    
    A = (np.exp(Z) - np.exp(-Z)) / (np.exp(Z)+np.exp(-Z))
    
    return A, Z

In [4]:
def initialize_parameters(layer_dims):
    
    parameters={}
    
    np.random.seed(1)
    
    for l in range(1,len(layer_dims)):
        parameters["W"+str(l)]= np.random.randn(layer_dims[l],layer_dims[l-1]) / np.sqrt(layer_dims[l-1])
        parameters["b"+str(l)]= np.zeros((layer_dims[l],1))
        
    return parameters

In [5]:
def linear_forward(A,W,b):
    
    Z=np.dot(W , A)+ b
    
    cache= (A,W,b)
    
    return Z, cache

In [6]:
def linear_act_forward(A_prev, W, b, act_function="sigmoid"):
    
    if act_function=="sigmoid":
        Z , linear_cache = linear_forward(A_prev,W,b)
        A , activation_cache = sigmoid(Z)
        
    elif act_function=="relu":
        Z , linear_cache= linear_forward(A_prev,W,b)
        A , activation_cache = relu(Z)
        
    elif act_function=="tanh":
        Z , linear_cache = linear_forward(A_prev,W,b)
        A , activation_cache = tanh(Z)
        
    cache = (linear_cache,activation_cache)
    
    return A , cache
    

In [7]:
def linear_model(X, parameters,act_pref="relu"):
    
    caches=[]
    
    A = X
    L = len(parameters) // 2
    
    for l in range(1,L):
        A_prev=A
        W = parameters["W"+str(l)]
        b= parameters ["b"+str(l)]
        A , cache = linear_act_forward(A_prev,W,b, act_function=act_pref)
        caches.append(cache)
    
    W=parameters["W"+str(L)]
    b=parameters["b"+str(L)]
    AL , cache = linear_act_forward(A,W,b,act_function="sigmoid")
    caches.append(cache)
    
    return AL, caches
    

In [8]:
def compute_cost(AL, Y):
    
    m=Y.shape[1]
    
    cost=-np.sum((Y*np.log(AL)+(1-Y)*np.log(1-AL))) / m 
    
    cost = np.squeeze(cost)
    
    return cost

In [9]:
def linear_backward(dZ, cache):
    
    m=dZ.shape[1]
    
    A_prev=cache[0]
    W=cache[1]
    
    dW = np.dot(dZ,A_prev.T)*(1/m)
    db = np.sum(dZ,axis=1,keepdims=True) * (1/m)
    dA_prev = np.dot(W.T,dZ)
    
    return dW, db, dA_prev

In [10]:
def sigmoid_backward(dA, cache):
    
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    return dZ

In [11]:
def relu_backward(dA, cache):

    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    
    return dZ
    

In [None]:
def tanh_backward(dA, cache):
    
    Z = cache
    dZ = dA * (1-(tanh(Z)[0]**2))
    
    return dZ

In [12]:
def linear_act_backward(dA, cache, act_function="sigmoid"):
    
    linear_cache,activation_cache = cache
    
    if act_function=="sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dW, db, dA_prev = linear_backward(dZ, linear_cache)
        
    elif act_function=="relu":
        dZ = relu_backward(dA, activation_cache)
        dW, db, dA_prev = linear_backward(dZ, linear_cache)
        
    elif act_function=="tanh":
        dZ = tanh_backward(dA, activation_cache)
        dW, db, dA_prev = linear_backward(dZ, linear_cache)
        
    return dW, db, dA_prev

In [13]:
def backward_model(AL,Y,caches,act_pref="relu"):
    
    grads=dict()
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) 
    
    L = len(caches)
    
    grads["dW"+str(L)],grads["db"+str(L)],grads["dA"+str(L-1)]=linear_act_backward(dAL,caches[L-1],act_function="sigmoid")
    
    for l in reversed(range(1,L)):
        current_cache=caches[l-1]
        dW_temp,db_temp,dA_temp=linear_act_backward(grads["dA"+str(l)],current_cache,act_function=act_pref)
        grads["dW"+str(l)]=dW_temp
        grads["db"+str(l)]=db_temp
        grads["dA"+str(l-1)]=dA_temp
    
    return grads

In [14]:
def update_parameters(parameters,grads,learning_rate):
    
    L=len(parameters)//2
    
    for l in range(L):
        parameters["W"+str(l+1)]=parameters["W"+str(l+1)] - learning_rate * (grads["dW"+str(l+1)])
        parameters["b"+str(l+1)]=parameters["b"+str(l+1)] - learning_rate * (grads["db" + str(l+1)])
        
    return parameters

### without early stopping

In [21]:
def dnn_model(X,
              Y, 
              layer_dims,
              learning_rate=0.0075,
              iteration=2500,
              print_cost=False,
              act_pref="relu"):
    
    costs=[]
    
    parameters = initialize_parameters(layer_dims)
    
    for i in range(0,iteration):
        
        AL, caches = linear_model(X,parameters,act_pref)
        
        cost = compute_cost(AL, Y)
        
        grads = backward_model(AL, Y, caches,act_pref)
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)
            
   
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    return parameters

### with early stopping

In [None]:
def dnn_model(X,
              Y,
              layer_dims,
              learning_rate=0.0075,
              iteration=2500,
              print_cost=False,
              act_pref="relu",
              early_stopping=False,
              eval_set=[]):
    
    costs=[]
    accuracy_scores=[]
    
    parameters = initialize_parameters(layer_dims)
    
    if early_stopping==False:
        
        for i in range(0,iteration):

            AL, caches = linear_model(X,parameters,act_pref)

            cost = compute_cost(AL, Y)

            grads = backward_model(AL, Y, caches,act_pref)

            parameters = update_parameters(parameters, grads, learning_rate)

            if print_cost and i % 100 == 0:
                print ("Train cost after iteration %i: %f" %(i, cost))
            if print_cost and i % 100 == 0:
                costs.append(cost)
    
    
    if early_stopping==True:
        
        x_eval=eval_set[0]
        y_eval=eval_set[1]
        
        for i in range(0,iteration):

            AL, caches = linear_model(X,parameters,act_pref)

            cost = compute_cost(AL, Y)

            grads = backward_model(AL, Y, caches,act_pref)

            parameters = update_parameters(parameters, grads, learning_rate)

            if print_cost and i % 100 == 0:
                print ("Train cost after iteration %i: %f" %(i, cost),end="   ")
                
                
                y_pred_test=predict(test_x,parameters)
                accuracy, trues = accuracy_score_me(y_pred_test,test_y)
                
                print("Test set accuracy: %f" %(accuracy))
                
                
            if print_cost and i % 100 == 0:
                costs.append(cost)
                accuracy_scores.append(accuracy)
                
    
          
    if print_cost==True:
        
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per hundreds)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        plt.plot(np.squeeze(accuracy_scores))
        plt.ylabel('accuracy score for test set')
        plt.xlabel('iterations (per hundreds)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

    
    return parameters

In [151]:
def predict(X,parameters):
    
    m = X.shape[1]
    y_pred=np.zeros((1,m))
    
    probs, caches = linear_model(X,parameters)
    
    for i in range(len(probs[0])):
        
        if probs[0][i] >= 0.5:
            y_pred[0][i]=1
        
        else:
            y_pred[0][i]=0
    
    return y_pred
    

In [158]:
def accuracy_score_my(y_pred,y_true):
    
    m=y_true.shape[1]
    
    acc=np.sum((y_pred==y_true)) / m
    trues=np.sum((y_pred==y_true))
    
    
    return acc ,trues

In [199]:
def confusion_matrix(y_pred,y_true):
    
    true_p=np.sum((y_pred==y_true),where=(y_pred==1))
    true_n=np.sum((y_pred==y_true),where=(y_pred==0))
    false_p=np.sum((y_pred!=y_true),where=(y_pred==1))
    false_n=np.sum((y_pred!=y_true),where=(y_pred==0))
    
    matrix=np.array([true_n,false_p,false_n,true_p]).reshape(2,2)
    df=pd.DataFrame(matrix,index=[0,1],columns=[0,1])
    
    return df

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
def plot_confusion_matrix(pred_y, true_y, title='Confusion Matrix', figsize=(8,6)):
    """ Custom function for plotting a confusion matrix for predicted results """
    conf_matrix = confusion_matrix(pred_y, true_y)
    conf_df = pd.DataFrame(conf_matrix, columns=np.unique(true_y), index = np.unique(true_y))
    conf_df.index.name = 'Actual'
    conf_df.columns.name = 'Predicted'
    plt.figure(figsize = figsize)
    plt.title(title)
    sns.set(font_scale=1.4)
    sns.heatmap(conf_df, cmap="Blues", annot=True, 
                annot_kws={"size": 16}, fmt='g')
    plt.show()
    return

In [1]:
# This function needs the model algorithm to be edited. dnn_model must return "max_acc"

def learning_rate_search(learning_rate_grid=[0.0075]):
    max_acc_list=[]
    for lr in learning_rate_grid:
        params,max_acc=dnn_model(train_x, train_y, layers_dims, learning_rate=lr, iteration=500,print_cost=False,early_stopping=True,eval_set=[test_x,test_y])
        max_acc_list.append(max_acc)
    
    max_of_maxes=np.max(max_acc_list)
    ix=max_acc_list.index(max_of_maxes)
    
    return max_of_maxes, learning_rate_grid[ix] 