In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy

In [3]:
def init_parameters(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1,L):
        #parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #*0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    return parameters

In [4]:
def sigmoid_func(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    return A, cache

def relu_func(Z):
    A = np.maximum(0,Z)
    cache = Z 
    return A, cache

In [5]:
def relu_backward_func(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    return dZ

def sigmoid_backward_func(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    return dZ

In [6]:
def forward_prog(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2 

    for l in range(1,L):
        A_prev = A
        A, linear_cache, activation_cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], act_func='relu')
        cache = (linear_cache, activation_cache)
        caches.append(cache)
        
    AL, linear_cache, activation_cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], act_func='sigmoid')
    cache = (linear_cache, activation_cache)
    caches.append(cache)

    return AL, caches

In [7]:
def linear_forward(A, W, b):
    Z = np.dot(W,A) + b
    cache = (A, W, b)
    return Z, cache

def linear_activation_forward(A_prev, W, b, act_func):
    Z, linear_cache = linear_forward(A_prev,W,b)
    if act_func == 'sigmoid':
        A, activation_cache = sigmoid_func(Z)

    elif act_func == 'relu':
        A, activation_cache = relu_func(Z)
        
    return A, linear_cache, activation_cache

def forward_prog_D(X, parameters, drop_prob = 0.5):
    caches=[]
    A = X
    L = len(parameters)//2
    np.random.seed(1)
    for l in range(1,L):
        A_prev = A
        A, linear_cache, activation_cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], act_func='relu')
        D = np.random.rand(A.shape[0], A.shape[1])
        D = D < drop_prob
        A = np.multiply(D,A)
        A = A/drop_prob
        cache = (linear_cache, activation_cache,D)
        caches.append(cache)
        
    AL, linear_cache, activation_cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)] , act_func='sigmoid')
    cache = (linear_cache, activation_cache,D)
    caches.append(cache)

    return AL, caches

In [8]:
def compute_cost(AL, Y):
    epsilon = 1e-15
    m = Y.shape[1]
    cost = np.dot((Y*np.log(AL+epsilon) + (1-Y)*np.log(1-AL+epsilon)),np.ones((m,1)))*(-1/m)
    cost = np.squeeze(cost)
    """m = Y.shape[1]
    
    logprobs = np.multiply(-np.log(AL+ epsilon),Y) + np.multiply(-np.log(1 - AL + epsilon), 1 - Y)
    cost = 1./m * np.nansum(logprobs)"""
    return cost

def compute_with_reg(AL, Y, parameters, lambd):
    m = Y.shape[1]
    L = len(parameters) // 2
    tmp_sum = 0
    for l in range(1,L+1):
        tmp_sum = np.sum(np.square(parameters['W'+str(l)])) + tmp_sum
    L2_cost = lambd*tmp_sum/(2*m)
    cost = compute_cost(AL, Y)
    total_cost = cost + L2_cost
    return total_cost

In [9]:
def linear_backward(dZ, cache, lambd=0):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = np.dot(dZ, A_prev.T)/m + lambd*W/m
    db = np.sum(dZ, axis = 1, keepdims = True)/m
    dA_prev = np.dot(W.T,dZ)
    
    return dA_prev, dW, db 

def linear_activation_backward(dA, cache, act_func,lambd=0):
    linear_cache, activation_cache = cache
    if act_func == 'relu':
        dZ = relu_backward_func(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache,lambd=lambd)
    elif act_func == 'sigmoid':
        dZ = sigmoid_backward_func(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache,lambd=lambd)
    return dA_prev, dW, db


def back_prog(AL, Y, caches,lambd):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    epsilon = 1e-15
    dAL = -(np.divide(Y, AL + epsilon) -np.divide(1 - Y, 1-AL + epsilon))

    linear_cache, activation_cache = caches[L-1]
    current_cache = (linear_cache, activation_cache)
    
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, act_func='sigmoid',lambd=lambd)
    grads['dA' + str(L-1)] = dA_prev_temp
    grads['dW' + str(L)] = dW_temp
    grads['db' + str(L)] = db_temp

    for l in reversed(range(L-1)):
        linear_cache, activation_cache = caches[l]
        current_cache = (linear_cache, activation_cache)
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dA_prev_temp, current_cache, act_func='relu',lambd=lambd)
        grads['dA' + str(l)] = dA_prev_temp
        grads['dW' + str(l + 1)] = dW_temp
        grads['db' + str(l + 1)] = db_temp
        
        
    return grads

In [10]:
def back_prog_D(AL, Y, caches, drop_prob):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    epsilon = 1e-15
    dAL = -(np.divide(Y, AL + epsilon) -np.divide(1 - Y, 1-AL + epsilon))

    linear_cache, activation_cache, D = caches[L-1]
    
    current_cache = (linear_cache, activation_cache)
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, act_func='sigmoid')
    grads['dW' + str(L)] = dW_temp
    grads['db' + str(L)] = db_temp
    
    
    for l in reversed(range(L-1)):
        linear_cache, activation_cache, D = caches[l]
        dA_prev_temp = np.multiply(D, dA_prev_temp)
        dA_prev_temp = dA_prev_temp/drop_prob
        grads['dA' + str(l+1)] = dA_prev_temp
        current_cache = (linear_cache, activation_cache)
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dA_prev_temp, current_cache, act_func='relu')
        grads['dW' + str(l + 1)] = dW_temp
        grads['db' + str(l + 1)] = db_temp
        
    return grads

In [11]:
def update_parameters(params, grads, learning_rate):
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2
    for l in range(L):
        parameters['W' + str(l+1)] = parameters['W' + str(l+1)] - grads['dW' + str(l+1)]*learning_rate
        parameters['b' + str(l+1)] = parameters['b' + str(l+1)] - grads['db' + str(l+1)]*learning_rate
    
    return parameters

In [12]:
def deep_NN(X,Y, layers_dims, learning_rate = 0.3, iter_no = 30000, pr_cost = False, drop_prob=1, lambd=0):
    np.random.seed(1)
    costs=[]
    parameters = init_parameters(layers_dims)
    for i in range(0, iter_no):
        if drop_prob == 1:
            AL, caches = forward_prog(X, parameters)
        else:
            AL, caches = forward_prog_D(X, parameters, drop_prob)

        if lambd == 0:
            cost = compute_cost(AL,Y)
        else:
            cost = compute_with_reg(AL, Y, parameters,lambd)

        if drop_prob == 1:
            grads = back_prog(AL, Y, caches,lambd)
        else:
            grads = back_prog_D(AL, Y, caches,drop_prob)
            
        parameters = update_parameters(parameters, grads, learning_rate = learning_rate)
        
        if pr_cost and i%1000 == 0 or i == iter_no-1:
            print("{} - Cost = {}".format(i,np.squeeze(cost)))
        if i%1000 == 0 or i == iter_no:
            costs.append(cost)
    return parameters, costs

In [13]:
def cross_validation(train_set,layers ,hyperparameters, n=5 ,percent = 0.3,random=42,pr=False):
    train_acc = 0
    valid_acc = 0
    
    for i in range(1,n+1):
        valid = df.sample(n = int(len(train_set)*percent),random_state=i*random)
        train = df.drop(valid.index)
        X_train, y_train = prepare_data(train)
        X_valid, y_valid = prepare_data(valid)
        layers_dims=[X_train.shape[0]]+layers
        parameters, cache = deep_NN(X_train, y_train, layers_dims,learning_rate=hyperparameters[0],iter_no=hyperparameters[1],
                                    pr_cost=pr,drop_prob=hyperparameters[2],lambd=hyperparameters[3])
        prediction, acc = predict(X_train, y_train, parameters)
        train_acc = train_acc + acc
        prediction, acc = predict(X_valid, y_valid, parameters)
        valid_acc = valid_acc + acc
        print("\n")
        
    mean_train = train_acc/n
    mean_valid = valid_acc/n
    print("Mean train acc: ", mean_train)
    print("Mean valid acc: ", mean_valid)
    return mean_train, mean_valid

In [14]:
def predict(X, y, parameters):
    m = X.shape[1]
    n = len(parameters) // 2 
    p = np.zeros((1,m))
    probas, caches = forward_prog(X, parameters)
    preds = (probas > 0.5).astype(int)
    accuracy = np.mean((preds == y).astype(int)).astype(float)
    print("Accuracy:",accuracy)
    
    return preds, accuracy

In [15]:
def prepare_data(df):
    X = df.drop(columns = 'Survived').values
    y = df['Survived'].values.reshape((-1,1))
    maxX = np.max(X,axis=0)
    X=X/maxX
    return X.T,y.T
def prepare_test_data(df):
    X = df.values
    maxX = np.max(X,axis=0)
    X=X/maxX
    return X.T

In [42]:
df = pd.read_csv('Data/train.csv')
df_test = pd.read_csv('Data/test.csv')

In [43]:
df.drop(columns='Cabin', inplace= True)
df_test.drop(columns='Cabin', inplace= True)

df.drop(columns='Name', inplace= True)
df_test.drop(columns='Name', inplace= True)

df.drop(columns='PassengerId', inplace= True)
df_test.drop(columns='PassengerId', inplace= True)

df.drop(columns='Ticket', inplace= True)
df_test.drop(columns='Ticket', inplace= True)

In [44]:
df.loc[df['Pclass'] == 1, 'Age'] = df.loc[df['Pclass'] == 1, 'Age'].fillna(df[df['Pclass'] == 1]['Age'].mean())
df.loc[df['Pclass'] == 2, 'Age'] = df.loc[df['Pclass'] == 2, 'Age'].fillna(df[df['Pclass'] == 2]['Age'].mean())
df.loc[df['Pclass'] == 3, 'Age'] = df.loc[df['Pclass'] == 3, 'Age'].fillna(df[df['Pclass'] == 3]['Age'].mean())
df['Embarked'] = df['Embarked'].fillna('S')

df_test.loc[df_test['Pclass'] == 1, 'Age'] = df_test.loc[df_test['Pclass'] == 1, 'Age'].fillna(df_test[df_test['Pclass'] == 1]['Age'].mean())
df_test.loc[df_test['Pclass'] == 2, 'Age'] = df_test.loc[df_test['Pclass'] == 2, 'Age'].fillna(df_test[df_test['Pclass'] == 2]['Age'].mean())
df_test.loc[df_test['Pclass'] == 3, 'Age'] = df_test.loc[df_test['Pclass'] == 3, 'Age'].fillna(df_test[df_test['Pclass'] == 3]['Age'].mean())

df_test['Fare'] = df_test['Fare'].fillna(df_test['Fare'].mean())

In [45]:
df['Fare']=df['Fare']/(df['SibSp'] + df['Parch'] +1)
df_test['Fare']=df_test['Fare']/(df_test['SibSp'] + df_test['Parch'] +1)

In [46]:
print(df.isnull().sum(),df_test.isnull().sum())

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64 Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


In [47]:
for label, content in df.items():
    if not pd.api.types.is_numeric_dtype(content):
        df[label] = content.astype("category").cat.as_ordered()
        df[label] = pd.Categorical(content).codes

for label, content in df_test.items():
    if not pd.api.types.is_numeric_dtype(content):
        df_test[label] = content.astype("category").cat.as_ordered()
        df_test[label] = pd.Categorical(content).codes

In [22]:
df['S_P_W']=df['Pclass']*df['Pclass']*(df['Sex'] + 1)
df_test['S_P_W']=df_test['Pclass']*df_test['Pclass']*(df_test['Sex'] + 1)

In [48]:
#np.random.seed(0)
valid = df.sample(n = int(len(df)*0.3),random_state=1)
train = df.drop(valid.index)

In [49]:
X_train, y_train = prepare_data(train)
X_valid, y_valid = prepare_data(valid)
X_Train, y_Train = prepare_data(df)
test = prepare_test_data(df_test)

In [50]:
layers_dims=[X_train.shape[0],25,13,6,1]

In [55]:
hyperparameters=[0.2,2000,1,0.7]
layers=[20, 3, 1]
t,v =cross_validation(df,layers ,hyperparameters,n=5,random=13)

1999 - Cost = 0.4075956444692329
Accuracy: 0.8413461538461539
Accuracy: 0.8164794007490637


1999 - Cost = 0.4092292148920553
Accuracy: 0.8333333333333334
Accuracy: 0.8202247191011236


1999 - Cost = 0.4312642838840399
Accuracy: 0.8237179487179487
Accuracy: 0.846441947565543


1999 - Cost = 0.4206803155517163
Accuracy: 0.8221153846153846
Accuracy: 0.8239700374531835


1999 - Cost = 0.43237919868563907
Accuracy: 0.8269230769230769
Accuracy: 0.8239700374531835


Mean train acc:  0.8294871794871795
Mean valid acc:  0.8262172284644196


In [56]:
hyperparameters=[0.2,2000,1,0.7]
layers=[30, 15, 6, 1]
t,v =cross_validation(df,layers ,hyperparameters,n=5,random=13)

1999 - Cost = 0.40852093710375165
Accuracy: 0.844551282051282
Accuracy: 0.8014981273408239


1999 - Cost = 0.39144842748153397
Accuracy: 0.8493589743589743
Accuracy: 0.797752808988764


1999 - Cost = 0.4383980119412696
Accuracy: 0.8044871794871795
Accuracy: 0.8389513108614233


1999 - Cost = 0.4204302060871519
Accuracy: 0.8285256410256411
Accuracy: 0.8164794007490637


1999 - Cost = 0.43516844786257114
Accuracy: 0.842948717948718
Accuracy: 0.8164794007490637


Mean train acc:  0.8339743589743589
Mean valid acc:  0.8142322097378276


In [57]:
layers=[X_train[0],20, 3, 1]
parameters, cache = deep_NN(X_train, y_train,layers_dims,learning_rate=0.2,iter_no=2000,pr_cost=True,lambd=0.7)

0 - Cost = 0.7250522813049226
1000 - Cost = 0.4103557952241273
1999 - Cost = 0.3921358619991559


In [58]:
prediction, _ = predict(test,1,parameters)
prediction = prediction.astype(int)[0]
print(prediction)

Accuracy: 0.34688995215311
[0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1
 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 0 1 0
 1 0 0 1 0 1 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0
 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
 0 0 1 0 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1
 0 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 1 0
 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1
 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 0 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0
 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 0 0 1 1 0 1 0 0 1 1 0
 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 0 0
 0 1 1 1 1 1 0 1 0 0 0]


In [40]:
write= pd.read_csv("Data/test.csv")
result=pd.DataFrame({"PassengerId":write['PassengerId'],"Survived":prediction})
result.to_csv("resultdeepNN.csv", index=False)

In [41]:
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,S_P_W
0,0,3,1,22.00000,1,0,3.62500,2,18
1,1,1,0,38.00000,1,0,35.64165,0,1
2,1,3,0,26.00000,0,0,7.92500,2,9
3,1,1,0,35.00000,1,0,26.55000,2,1
4,0,3,1,35.00000,0,0,8.05000,2,18
...,...,...,...,...,...,...,...,...,...
886,0,2,1,27.00000,0,0,13.00000,2,8
887,1,1,0,19.00000,0,0,30.00000,2,1
888,0,3,0,25.14062,1,2,5.86250,2,9
889,1,1,1,26.00000,0,0,30.00000,0,2
