In [1]:
import pandas as pd
import numpy as np

from pandas import Series
from numpy.random import randn

from sklearn.preprocessing import LabelEncoder 

import matplotlib.pyplot as plt # for vizualization
from matplotlib.pyplot import figure # for figuresize

In [2]:
train = pd.read_csv(r"C:\Users\daniel\Documents\GitHub\datasets\titanic\train.csv")
test =  pd.read_csv(r"C:\Users\daniel\Documents\GitHub\datasets\titanic\test.csv")

In [3]:
le = LabelEncoder()
train["Sex"] = le.fit_transform(train["Sex"])
train["Embarked"] = le.fit_transform(train["Embarked"].astype(str))
train.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

In [4]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,1,22.0,1,0,7.25,2
1,2,1,1,0,38.0,1,0,71.2833,0
2,3,1,3,0,26.0,0,0,7.925,2
3,4,1,1,0,35.0,1,0,53.1,2
4,5,0,3,1,35.0,0,0,8.05,2


In [5]:
test["Sex"] = le.fit_transform(test["Sex"])
test["Embarked"] = le.fit_transform(test["Embarked"].astype(str))
test.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
test.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,892,3,1,34.5,0,0,7.8292,1
1,893,3,0,47.0,1,0,7.0,2
2,894,2,1,62.0,0,0,9.6875,1
3,895,3,1,27.0,0,0,8.6625,2
4,896,3,0,22.0,1,1,12.2875,2


In [6]:
x_train = train
y_train = train["Survived"].to_frame()
x_train.drop(["Survived"], axis=1, inplace=True)
x_test = test
y_test = pd.DataFrame(0, index=np.arange(len(x_test)), columns=["Survived"])
#y_train.reshape(-1,1)

In [7]:
x_train = x_train.T
x_test = x_test.T
y_train = y_train.T
y_test = y_test.T

In [8]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(8, 891)
(1, 891)
(8, 418)
(1, 418)


In [9]:
def initialize_parameters_and_layer_sizes_NN(x_train, y_train):
    
    parameters = {"W1": np.random.randn(2,x_train.shape[0]) * 0.1,
                  "b1": np.zeros((2,1)),
                  "W2": np.random.randn(2,2) * 0.1,
                  "b2": np.zeros((2,1)),
                  "W3": np.random.randn(1,2) * 0.1,
                  "b3": np.zeros((1,1))}
    
    return parameters

parameters = initialize_parameters_and_layer_sizes_NN(x_train, y_train)

In [10]:
print(parameters["W1"].shape)
print(parameters["W2"].shape)
print(parameters["W3"].shape)

(2, 8)
(2, 2)
(1, 2)


In [11]:
print(parameters["b1"].shape)
print(parameters["b2"].shape)
print(parameters["b3"].shape)

(2, 1)
(2, 1)
(1, 1)


In [12]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [13]:
def forward_propagation_NN(x_train, parameters):
    
    Z1 = np.dot(parameters["W1"],x_train) + parameters["b1"]
    A1 = np.tanh(Z1) # tanh is used as activation function 1
    Z2 = np.dot(parameters["W2"],A1) + parameters["b2"]
    A2 = np.tanh(Z2) # tanh is used as activation function 2
    Z3 = np.dot(parameters["W3"],A2) + parameters["b3"]
    A3 = sigmoid(Z3)

    cache = {"Z1": Z1,
             "A1": A1,
             "W1": parameters["W1"],
             "Z2": Z2,
             "A2": A2,
             "W2": parameters["W2"],
             "Z3": Z3,
             "A3": A3,
             "W3": parameters["W3"]}
    
    return A3, cache

A3, cache = forward_propagation_NN(x_train, parameters)

In [24]:
parameters

{'W1': array([[-0.11911589, -0.07313619,  0.04058722, -0.02618509, -0.14228985,
         -0.04627016, -0.06243265, -0.09654437],
        [-0.08154831,  0.18408929, -0.00699826,  0.05467432, -0.07296852,
         -0.16641984,  0.05392652, -0.21948938]]), 'b1': array([[0.],
        [0.]]), 'W2': array([[ 0.01192398, -0.05695352],
        [ 0.13829789,  0.08644977]]), 'b2': array([[0.],
        [0.]]), 'W3': array([[-0.27228641,  0.02117837]]), 'b3': array([[0.]])}

In [22]:
A3

array([[0.50402612, 0.50440675, 0.50413425, 0.50440455, 0.50426378,
               nan, 0.50440635, 0.50140076, 0.50358031, 0.50420521,
        0.50049823, 0.5043966 , 0.50232463, 0.50420982, 0.50042489,
        0.50434839, 0.50077504,        nan, 0.50362928,        nan,
        0.50401679, 0.50266513, 0.49886502, 0.50372253, 0.49737335,
        0.50309117,        nan, 0.50440682,        nan,        nan,
        0.50384345,        nan,        nan, 0.50383639, 0.50439284,
        0.50419026,        nan, 0.49625084, 0.49641018, 0.49625021,
        0.49774664, 0.49686816,        nan, 0.49645785, 0.49596516,
               nan,        nan,        nan,        nan, 0.49589557,
        0.49592516, 0.49581722, 0.50436002, 0.49613099, 0.50433934,
               nan, 0.49578772, 0.49586378, 0.49577419, 0.49584365,
        0.49579612, 0.50309822, 0.50398914, 0.49576988,        nan,
               nan, 0.49577654, 0.49576976, 0.49576723, 0.49577045,
        0.4957737 , 0.49578582, 0.49694959, 0.49

In [23]:
A3.shape

(1, 891)

In [26]:
np.nan_to_num(A3)

array([[0.50402612, 0.50440675, 0.50413425, 0.50440455, 0.50426378,
        0.        , 0.50440635, 0.50140076, 0.50358031, 0.50420521,
        0.50049823, 0.5043966 , 0.50232463, 0.50420982, 0.50042489,
        0.50434839, 0.50077504, 0.        , 0.50362928, 0.        ,
        0.50401679, 0.50266513, 0.49886502, 0.50372253, 0.49737335,
        0.50309117, 0.        , 0.50440682, 0.        , 0.        ,
        0.50384345, 0.        , 0.        , 0.50383639, 0.50439284,
        0.50419026, 0.        , 0.49625084, 0.49641018, 0.49625021,
        0.49774664, 0.49686816, 0.        , 0.49645785, 0.49596516,
        0.        , 0.        , 0.        , 0.        , 0.49589557,
        0.49592516, 0.49581722, 0.50436002, 0.49613099, 0.50433934,
        0.        , 0.49578772, 0.49586378, 0.49577419, 0.49584365,
        0.49579612, 0.50309822, 0.50398914, 0.49576988, 0.        ,
        0.        , 0.49577654, 0.49576976, 0.49576723, 0.49577045,
        0.4957737 , 0.49578582, 0.49694959, 0.49

In [14]:
def compute_cost_NN(A3, Y, parameters):
    
    logprobs = np.multiply(np.log(A3),Y)
    cost = -np.sum(logprobs)/Y.shape[1]
    
    return cost

cost = compute_cost_NN(A3, y_train, parameters)

In [28]:
np.nan_to_num(A3)

array([[0.50402612, 0.50440675, 0.50413425, 0.50440455, 0.50426378,
        0.        , 0.50440635, 0.50140076, 0.50358031, 0.50420521,
        0.50049823, 0.5043966 , 0.50232463, 0.50420982, 0.50042489,
        0.50434839, 0.50077504, 0.        , 0.50362928, 0.        ,
        0.50401679, 0.50266513, 0.49886502, 0.50372253, 0.49737335,
        0.50309117, 0.        , 0.50440682, 0.        , 0.        ,
        0.50384345, 0.        , 0.        , 0.50383639, 0.50439284,
        0.50419026, 0.        , 0.49625084, 0.49641018, 0.49625021,
        0.49774664, 0.49686816, 0.        , 0.49645785, 0.49596516,
        0.        , 0.        , 0.        , 0.        , 0.49589557,
        0.49592516, 0.49581722, 0.50436002, 0.49613099, 0.50433934,
        0.        , 0.49578772, 0.49586378, 0.49577419, 0.49584365,
        0.49579612, 0.50309822, 0.50398914, 0.49576988, 0.        ,
        0.        , 0.49577654, 0.49576976, 0.49576723, 0.49577045,
        0.4957737 , 0.49578582, 0.49694959, 0.49

In [29]:
def backward_propagation_NN(parameters, cache, X, Y):
    
    dimension = X.shape[0] # it is 20 for our case
    print("dimension "+str(dimension))
    dZ3 = cache["A3"] - Y # d(cost)/d(Z3)
    dW3 = 1/dimension * np.dot(dZ3,cache["A2"].T) # d(cost)/d(W3)
    db3 = 1/dimension * np.sum(dZ3, axis=1) # d(cost)/d(b3)
    dZ2 = np.multiply(np.dot(dZ3.T, cache["W3"]).T , 1-np.power(cache["A2"],2)) # d(cost)/d(Z2)
    dW2 = 1/dimension * np.dot(cache["A1"], dZ2.T) # d(cost)/d(W2)
    db2 = 1/dimension * np.sum(dZ2, axis=1) # d(cost)/d(b2)
    dZ1 = np.multiply(np.dot(dZ2.T, cache["W2"].T).T,1-np.power(cache["A1"],2)) # d(cost)/d(Z1)
    dW1 = 1/dimension * np.dot(dZ1, X.T) # d(cost)/d(W1)
    db1 = 1/dimension * np.sum(dZ1,axis=1) # d(cost)/d(b1)
    grads = {'dW3':dW3, 
             'db3':db3,
             'dW2':dW2,
             'db2':db2,
             'dW1':dW1,
             'db1':db1}
    
    return grads

grads = backward_propagation_NN(parameters, cache, x_train, y_train)

dimension 8


In [30]:
Learning_Rate = 0.001

In [31]:
parameters["b3"]

array([[0.]])

In [32]:
grads["db3"]

Survived    8.031149
dtype: float64

In [33]:
grads

{'dW3': array([[nan, nan]]), 'db3': Survived    8.031149
 dtype: float64, 'dW2': array([[nan, nan],
        [nan, nan]]), 'db2': array([nan, nan]), 'dW1': array([[nan, nan, nan, nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan, nan, nan, nan]]), 'db1': array([nan, nan])}

In [34]:
parameters["b3"]-Learning_Rate*grads["db3"]

Exception: Data must be 1-dimensional

In [None]:
def update_parameters_NN(parameters, grads, learning_rate = Learning_Rate):
    parameters = {"W1": parameters["W1"]-learning_rate*grads["dW1"],
                  "b1": parameters["b1"]-learning_rate*grads["db1"],
                  "W2": parameters["W2"]-learning_rate*grads["dW2"],
                  "b2": parameters["b2"]-learning_rate*grads["db2"],
                  "W3": parameters["W3"]-learning_rate*grads["dW3"],
                  "b3": parameters["b3"][0]-learning_rate*grads["db3"]}
    
    return parameters

parameters = update_parameters_NN(parameters, grads, learning_rate = Learning_Rate)

In [None]:
def predict_NN(parameters,x_test):
    # x_test is the input for forward propagation
    A3, cache = forward_propagation_NN(x_test,parameters)
    Y_prediction = np.zeros((1,x_test.shape[1]))

    for i in range(A3.shape[1]):
        if A3[0,i]<= 0.5: # if smaller than 0.5, predict it as 0
            Y_prediction[0,i] = 0
        else: # if greater than 0.5, predict it as 1
            Y_prediction[0,i] = 1

    return Y_prediction

Y_prediction = predict_NN(parameters,x_test)

In [None]:
def three_layer_neural_network(x_train, y_train,x_test,y_test, num_iterations):
    cost_list = []
    index_list = []
    
    #initialize parameters and layer sizes
    parameters = initialize_parameters_and_layer_sizes_NN(x_train, y_train)

    for i in range(0, num_iterations):
        # forward propagation
        A3, cache = forward_propagation_NN(x_train,parameters)
        # compute cost
        cost = compute_cost_NN(A3, y_train, parameters)
        # backward propagation
        grads = backward_propagation_NN(parameters, cache, x_train, y_train)
        # update parameters
        parameters = update_parameters_NN(parameters, grads)
        
        if i % 100 == 0: # to visualize data in each 100 iteration
            cost_list.append(cost)
            index_list.append(i)

    figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
    plt.plot(index_list,cost_list)
    plt.xticks(index_list,rotation='vertical')
    plt.xlabel("Number of Iterarions", fontsize = 14)
    plt.ylabel("Cost", fontsize = 14)
    plt.show()
    
    # predict
    y_prediction_test = predict_NN(parameters,x_test)
    y_prediction_train = predict_NN(parameters,x_train)

    # Print train/test Accuracies
    print("train accuracy: %{}".format(round(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100,3)))
    print("test accuracy: %{}".format(round(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100,3)))
    return parameters

parameters = three_layer_neural_network(x_train, y_train,x_test,y_test, num_iterations=5000)
