In [1]:
import pandas as pd
import numpy as np

from pandas import Series
from numpy.random import randn

from sklearn.preprocessing import LabelEncoder 

import matplotlib.pyplot as plt # for vizualization
from matplotlib.pyplot import figure # for figuresize

In [2]:
train = pd.read_csv(r"C:\Users\daniel\Documents\GitHub\datasets\titanic\train.csv")
test =  pd.read_csv(r"C:\Users\daniel\Documents\GitHub\datasets\titanic\test.csv")

In [3]:
le = LabelEncoder()
train["Sex"] = le.fit_transform(train["Sex"])
train["Embarked"] = le.fit_transform(train["Embarked"].astype(str))
train.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

In [4]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,1,22.0,1,0,7.25,2
1,2,1,1,0,38.0,1,0,71.2833,0
2,3,1,3,0,26.0,0,0,7.925,2
3,4,1,1,0,35.0,1,0,53.1,2
4,5,0,3,1,35.0,0,0,8.05,2


In [5]:
test["Sex"] = le.fit_transform(test["Sex"])
test["Embarked"] = le.fit_transform(test["Embarked"].astype(str))
test.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
test.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,892,3,1,34.5,0,0,7.8292,1
1,893,3,0,47.0,1,0,7.0,2
2,894,2,1,62.0,0,0,9.6875,1
3,895,3,1,27.0,0,0,8.6625,2
4,896,3,0,22.0,1,1,12.2875,2


In [6]:
def prep_data(df):
    # Drop unwanted features
    # df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)
    
    # Fill missing data: Age and Fare with the mean, Embarked with most frequent value
    df[['Age']] = df[['Age']].fillna(value=df[['Age']].mean())
    df[['Fare']] = df[['Fare']].fillna(value=df[['Fare']].mean())
    df[['Embarked']] = df[['Embarked']].fillna(value=df['Embarked'].value_counts().idxmax())
    
    # Convert categorical  features into numeric
    # df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
      
    # Convert Embarked to one-hot
    enbarked_one_hot = pd.get_dummies(df['Embarked'], prefix='Embarked')
    df = df.drop('Embarked', axis=1)
    df = df.join(enbarked_one_hot)

    return df

In [7]:
train = prep_data(train)
test = prep_data(test)

In [8]:
x_train = train
y_train = train["Survived"].to_frame()
x_train.drop(["Survived"], axis=1, inplace=True)
x_test = test
y_test = pd.DataFrame(0, index=np.arange(len(x_test)), columns=["Survived"])
#y_train.reshape(-1,1)

In [9]:
x_train = x_train.T
x_test = x_test.T
y_train = y_train.T
y_test = y_test.T

In [10]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(11, 891)
(1, 891)
(8, 418)
(1, 418)


In [11]:
def initialize_parameters_and_layer_sizes_NN(x_train, y_train):
    
    parameters = {"W1": np.random.randn(2,x_train.shape[0]) * 0.1,
                  "b1": np.zeros((2,1)),
                  "W2": np.random.randn(2,2) * 0.1,
                  "b2": np.zeros((2,1)),
                  "W3": np.random.randn(1,2) * 0.1,
                  "b3": np.zeros((1,1))}
    
    return parameters

parameters = initialize_parameters_and_layer_sizes_NN(x_train, y_train)

In [12]:
print(parameters["W1"].shape)
print(parameters["W2"].shape)
print(parameters["W3"].shape)

(2, 11)
(2, 2)
(1, 2)


In [13]:
print(parameters["b1"].shape)
print(parameters["b2"].shape)
print(parameters["b3"].shape)

(2, 1)
(2, 1)
(1, 1)


In [14]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [15]:
def forward_propagation_NN(x_train, parameters):
    
    Z1 = np.dot(parameters["W1"],x_train) + parameters["b1"]
    A1 = np.tanh(Z1) # tanh is used as activation function 1
    Z2 = np.dot(parameters["W2"],A1) + parameters["b2"]
    A2 = np.tanh(Z2) # tanh is used as activation function 2
    Z3 = np.dot(parameters["W3"],A2) + parameters["b3"]
    A3 = sigmoid(Z3)

    cache = {"Z1": Z1,
             "A1": A1,
             "W1": parameters["W1"],
             "Z2": Z2,
             "A2": A2,
             "W2": parameters["W2"],
             "Z3": Z3,
             "A3": A3,
             "W3": parameters["W3"]}
    
    return A3, cache

A3, cache = forward_propagation_NN(x_train, parameters)

In [16]:
parameters

{'W1': array([[ 0.02443655,  0.18282576, -0.11199662,  0.04980434, -0.09905   ,
          0.15798178,  0.0382615 ,  0.04107357,  0.04164038, -0.00996996,
          0.06411254],
        [ 0.03375048, -0.05877678,  0.1074968 , -0.1951223 , -0.05563584,
          0.11705543,  0.02206659,  0.04186943, -0.15491309,  0.00262248,
         -0.05495038]]), 'b1': array([[0.],
        [0.]]), 'W2': array([[-0.01268856, -0.11962667],
        [ 0.0575027 ,  0.05200076]]), 'b2': array([[0.],
        [0.]]), 'W3': array([[0.26442579, 0.10014938]]), 'b3': array([[0.]])}

In [17]:
A3

array([[0.50713936, 0.50717954, 0.50716459, 0.50717911, 0.50717301,
        0.50717013, 0.50717967, 0.49904441, 0.50717445, 0.50686244,
        0.50156461, 0.50717954, 0.50714382, 0.50717953, 0.50700297,
        0.5071794 , 0.49753856, 0.50717308, 0.50717707, 0.50717504,
        0.50717841, 0.50717637, 0.5070366 , 0.5071721 , 0.50351021,
        0.50717945, 0.50717502, 0.49405817, 0.50717653, 0.5071752 ,
        0.50717918, 0.5066219 , 0.50717678, 0.50717964, 0.50708026,
        0.50717956, 0.50717553, 0.50711617, 0.50697701, 0.50630942,
        0.50717908, 0.50716788, 0.50717516, 0.49430671, 0.50705789,
        0.50717483, 0.50717556, 0.50717647, 0.5071726 , 0.50672383,
        0.49542923, 0.50702991, 0.50717967, 0.50716576, 0.50717973,
        0.50715148, 0.50696808, 0.50716622, 0.49445949, 0.49621245,
        0.50698612, 0.50717129, 0.50717888, 0.49426829, 0.50714006,
        0.5071577 , 0.50716076, 0.50627293, 0.50555098, 0.50711981,
        0.50717002, 0.49977615, 0.50072707, 0.50

In [18]:
A3.shape

(1, 891)

In [19]:
np.nan_to_num(A3)

array([[0.50713936, 0.50717954, 0.50716459, 0.50717911, 0.50717301,
        0.50717013, 0.50717967, 0.49904441, 0.50717445, 0.50686244,
        0.50156461, 0.50717954, 0.50714382, 0.50717953, 0.50700297,
        0.5071794 , 0.49753856, 0.50717308, 0.50717707, 0.50717504,
        0.50717841, 0.50717637, 0.5070366 , 0.5071721 , 0.50351021,
        0.50717945, 0.50717502, 0.49405817, 0.50717653, 0.5071752 ,
        0.50717918, 0.5066219 , 0.50717678, 0.50717964, 0.50708026,
        0.50717956, 0.50717553, 0.50711617, 0.50697701, 0.50630942,
        0.50717908, 0.50716788, 0.50717516, 0.49430671, 0.50705789,
        0.50717483, 0.50717556, 0.50717647, 0.5071726 , 0.50672383,
        0.49542923, 0.50702991, 0.50717967, 0.50716576, 0.50717973,
        0.50715148, 0.50696808, 0.50716622, 0.49445949, 0.49621245,
        0.50698612, 0.50717129, 0.50717888, 0.49426829, 0.50714006,
        0.5071577 , 0.50716076, 0.50627293, 0.50555098, 0.50711981,
        0.50717002, 0.49977615, 0.50072707, 0.50

In [20]:
def compute_cost_NN(A3, Y, parameters):
    
    logprobs = np.multiply(np.log(A3),Y)
    cost = -np.sum(logprobs)/Y.shape[1]
    
    return cost

cost = compute_cost_NN(A3, y_train, parameters)

In [21]:
np.nan_to_num(A3)

array([[0.50713936, 0.50717954, 0.50716459, 0.50717911, 0.50717301,
        0.50717013, 0.50717967, 0.49904441, 0.50717445, 0.50686244,
        0.50156461, 0.50717954, 0.50714382, 0.50717953, 0.50700297,
        0.5071794 , 0.49753856, 0.50717308, 0.50717707, 0.50717504,
        0.50717841, 0.50717637, 0.5070366 , 0.5071721 , 0.50351021,
        0.50717945, 0.50717502, 0.49405817, 0.50717653, 0.5071752 ,
        0.50717918, 0.5066219 , 0.50717678, 0.50717964, 0.50708026,
        0.50717956, 0.50717553, 0.50711617, 0.50697701, 0.50630942,
        0.50717908, 0.50716788, 0.50717516, 0.49430671, 0.50705789,
        0.50717483, 0.50717556, 0.50717647, 0.5071726 , 0.50672383,
        0.49542923, 0.50702991, 0.50717967, 0.50716576, 0.50717973,
        0.50715148, 0.50696808, 0.50716622, 0.49445949, 0.49621245,
        0.50698612, 0.50717129, 0.50717888, 0.49426829, 0.50714006,
        0.5071577 , 0.50716076, 0.50627293, 0.50555098, 0.50711981,
        0.50717002, 0.49977615, 0.50072707, 0.50

In [22]:
def backward_propagation_NN(parameters, cache, X, Y):
    
    dimension = X.shape[0] # it is 20 for our case
    print("dimension "+str(dimension))
    dZ3 = cache["A3"] - Y # d(cost)/d(Z3)
    dW3 = 1/dimension * np.dot(dZ3,cache["A2"].T) # d(cost)/d(W3)
    db3 = 1/dimension * np.sum(dZ3, axis=1) # d(cost)/d(b3)
    dZ2 = np.multiply(np.dot(dZ3.T, cache["W3"]).T , 1-np.power(cache["A2"],2)) # d(cost)/d(Z2)
    dW2 = 1/dimension * np.dot(cache["A1"], dZ2.T) # d(cost)/d(W2)
    db2 = 1/dimension * np.sum(dZ2, axis=1) # d(cost)/d(b2)
    dZ1 = np.multiply(np.dot(dZ2.T, cache["W2"].T).T,1-np.power(cache["A1"],2)) # d(cost)/d(Z1)
    dW1 = 1/dimension * np.dot(dZ1, X.T) # d(cost)/d(W1)
    db1 = 1/dimension * np.sum(dZ1,axis=1) # d(cost)/d(b1)
    grads = {'dW3':dW3, 
             'db3':db3,
             'dW2':dW2,
             'db2':db2,
             'dW1':dW1,
             'db1':db1}
    
    return grads

grads = backward_propagation_NN(parameters, cache, x_train, y_train)

dimension 11


In [23]:
Learning_Rate = 0.001

In [24]:
parameters["b3"]

array([[0.]])

In [25]:
grads["db3"]

Survived    9.104745
dtype: float64

In [26]:
grads

{'dW3': array([[-0.63743956,  0.74882357]]), 'db3': Survived    9.104745
 dtype: float64, 'dW2': array([[2.37027241, 0.90301299],
        [1.14017181, 0.43291377]]), 'db2': array([2.3729473 , 0.90403043]), 'dW1': array([[-4.06727853e-03, -9.55942547e-04, -4.32039862e-04,
         -3.42771670e-03, -8.18696534e-04, -1.34058659e-04,
         -4.86895361e-03,  3.31632041e-05, -3.30258691e-05,
         -2.98965620e-04,  4.02697865e-09],
        [ 2.81141398e+00,  5.27177261e-02,  2.41742161e-02,
          5.01017682e-01,  2.49088552e-02,  8.87601828e-03,
          3.36465492e-01,  3.67826378e-04,  8.44406985e-04,
          1.71495084e-02, -2.34885906e-06]]), 'db1': array([-0.00029882,  0.01835939])}

In [27]:
parameters["b3"]-Learning_Rate*grads["db3"]

Exception: Data must be 1-dimensional

In [28]:
def update_parameters_NN(parameters, grads, learning_rate = Learning_Rate):
    parameters = {"W1": parameters["W1"]-learning_rate*grads["dW1"],
                  "b1": parameters["b1"]-learning_rate*grads["db1"],
                  "W2": parameters["W2"]-learning_rate*grads["dW2"],
                  "b2": parameters["b2"]-learning_rate*grads["db2"],
                  "W3": parameters["W3"]-learning_rate*grads["dW3"],
                  "b3": parameters["b3"][0]-learning_rate*grads["db3"]}
    
    return parameters

parameters = update_parameters_NN(parameters, grads, learning_rate = Learning_Rate)

In [29]:
def predict_NN(parameters,x_test):
    # x_test is the input for forward propagation
    A3, cache = forward_propagation_NN(x_test,parameters)
    Y_prediction = np.zeros((1,x_test.shape[1]))

    for i in range(A3.shape[1]):
        if A3[0,i]<= 0.5: # if smaller than 0.5, predict it as 0
            Y_prediction[0,i] = 0
        else: # if greater than 0.5, predict it as 1
            Y_prediction[0,i] = 1

    return Y_prediction

Y_prediction = predict_NN(parameters,x_test)

ValueError: shapes (2,11) and (8,418) not aligned: 11 (dim 1) != 8 (dim 0)

In [30]:
def three_layer_neural_network(x_train, y_train,x_test,y_test, num_iterations):
    cost_list = []
    index_list = []
    
    #initialize parameters and layer sizes
    parameters = initialize_parameters_and_layer_sizes_NN(x_train, y_train)

    for i in range(0, num_iterations):
        # forward propagation
        A3, cache = forward_propagation_NN(x_train,parameters)
        # compute cost
        cost = compute_cost_NN(A3, y_train, parameters)
        # backward propagation
        grads = backward_propagation_NN(parameters, cache, x_train, y_train)
        # update parameters
        parameters = update_parameters_NN(parameters, grads)
        
        if i % 100 == 0: # to visualize data in each 100 iteration
            cost_list.append(cost)
            index_list.append(i)

    figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
    plt.plot(index_list,cost_list)
    plt.xticks(index_list,rotation='vertical')
    plt.xlabel("Number of Iterarions", fontsize = 14)
    plt.ylabel("Cost", fontsize = 14)
    plt.show()
    
    # predict
    y_prediction_test = predict_NN(parameters,x_test)
    y_prediction_train = predict_NN(parameters,x_train)

    # Print train/test Accuracies
    print("train accuracy: %{}".format(round(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100,3)))
    print("test accuracy: %{}".format(round(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100,3)))
    return parameters

parameters = three_layer_neural_network(x_train, y_train,x_test,y_test, num_iterations=5000)


dimension 11


ValueError: operands could not be broadcast together with shapes (2,891) (2,2) 