In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy

In [5]:
def dictionary_to_vector(parameters):
    keys = []
    count = 0
    L = len(parameters)//2
    for l in range(1, L + 1):
        new_vector = np.reshape(parameters['W' + str(l)], (-1,1))
        key = 'W' + str(l)
        keys = keys + [key]*new_vector.shape[0]
        
        if count == 0:
            theta = new_vector
        else:
            theta = np.concatenate((theta, new_vector), axis=0)

        new_vector = np.reshape(parameters['b' + str(l)], (-1,1))
        keys = keys + [key]*new_vector.shape[0]
        key = 'b' + str(l)
        theta = np.concatenate((theta, new_vector), axis=0)
        count = count + 1

    return theta, keys

def vector_to_dictionary(theta,layers_dims):
    parameters = {}
    L = len(layers_dims)
    Wstart = 0
    for l in range(1, L):
        print(Wstart, Wstart + layers_dims[l]*layers_dims[l - 1],(layers_dims[l],layers_dims[l-1]))
        parameters["W" + str(l)] = theta[Wstart: Wstart + layers_dims[l]*layers_dims[l - 1]].reshape((layers_dims[l],layers_dims[l-1]))
        parameters["b" + str(l)] = theta[Wstart + layers_dims[l]*layers_dims[l-1]:Wstart + layers_dims[l]*layers_dims[l-1] + layers_dims[l]].reshape((layers_dims[l],1))
        print(Wstart + layers_dims[l]*layers_dims[l-1],Wstart + layers_dims[l]*layers_dims[l-1] + layers_dims[l],(layers_dims[l],1))
        Wstart = Wstart + layers_dims[l]*layers_dims[l-1] + layers_dims[l]
    #print(parameters)
    return parameters

def gradients_to_vector(gradients):
    count = 0
    L = len(parameters)//2
    for l in range(1,L + 1):
        new_vector = np.reshape(gradients['dW' + str(l)], (-1,1))
        
        if count == 0:
            theta = new_vector
        else:
            theta = np.concatenate((theta, new_vector), axis=0)

        new_vector = np.reshape(gradients['db' + str(l)], (-1,1))
        theta = np.concatenate((theta, new_vector), axis=0)
        count = count + 1

    return theta


def gradient_check_n(parameters, gradients, X, Y, layer_dims, epsilon = 1e-7):
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    print(parameters_values.shape, grad.shape)
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    for i in range(num_parameters):
        thetaplus =  np.copy(parameters_values)                                      
        thetaplus[i][0] = thetaplus[i][0]+epsilon
        #print(thetaplus, vector_to_dictionary(thetaplus,layer_dims))
        J_plus[i], _ = forward_prog_gd(X,vector_to_dictionary(thetaplus,layer_dims))
        thetaminus = np.copy(parameters_values)                                     
        thetaminus[i][0] = thetaplus[i][0]-epsilon                                       
        J_minus[i], _ = forward_prog_gd(X,vector_to_dictionary(thetaminus,layer_dims))
        gradapprox[i] = (J_plus[i]-J_minus[i])/(2*epsilon)
    numerator = np.linalg.norm(gradapprox-grad)                                           
    denominator = np.linalg.norm(grad)+np.linalg.norm(gradapprox)                                         
    difference = numerator/denominator

    if difference > 1e-7:
        print ("There is a mistake in the backward propagation! difference = " + str(difference))
    else:
        print ("Your backward propagation works perfectly fine! difference = " + str(difference))
    
    return difference

In [6]:
def init_parameters(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1,L):
        #parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #*0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    return parameters

def initialize_parameters_zeros(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1,L):
        #parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['W' + str(l)] = np.zeros((layer_dims[l],layer_dims[l-1]))
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    return parameters

def initialize_parameters_random(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1,L):
        #parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*10
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    return parameters

def initialize_parameters_he(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1,L):
        #parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*np.sqrt(2./layer_dims[l-1])
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    return parameters

In [7]:
def sigmoid_func(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    return A, cache

def relu_func(Z):
    A = np.maximum(0,Z)
    cache = Z 
    return A, cache

In [8]:
def relu_backward_func(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    return dZ

def sigmoid_backward_func(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    return dZ

In [32]:
def forward_prog(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2 

    for l in range(1,L):
        A_prev = A
        A, linear_cache, activation_cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], act_func='relu')
        cache = (linear_cache, activation_cache)
        caches.append(cache)
        
    AL, linear_cache, activation_cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], act_func='sigmoid')
    cache = (linear_cache, activation_cache)
    caches.append(cache)

    return AL, caches

In [10]:
def forward_prog_gd(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2 
    for l in range(1,L):
        print(parameters['W' + str(l)].shape,  parameters['b' + str(l)].shape)
    
    for l in range(1,L):
        A_prev = A
        print(A_prev.shape, parameters['W' + str(l)].shape)
        A, linear_cache, activation_cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], act_func='relu')
        cache = (activation_cache, linear_cache)
        caches.append(cache)
        
    AL, linear_cache, activation_cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], act_func='sigmoid')
    cache = (activation_cache, linear_cache)
    caches.append(cache)

    return AL, caches

In [11]:
def linear_forward(A, W, b):
    Z = np.dot(W,A) + b
    cache = (A, W, b)
    return Z, cache

def linear_activation_forward(A_prev, W, b, act_func):
    Z, linear_cache = linear_forward(A_prev,W,b)
    if act_func == 'sigmoid':
        A, activation_cache = sigmoid_func(Z)

    elif act_func == 'relu':
        A, activation_cache = relu_func(Z)
        
    return A, linear_cache, activation_cache

def forward_prog_D(X, parameters, drop_prob = 0.5):
    caches=[]
    A = X
    L = len(parameters)//2
    np.random.seed(1)
    for l in range(1,L):
        A_prev = A
        A, linear_cache, activation_cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], act_func='relu')
        D = np.random.rand(A.shape[0], A.shape[1])
        D = D < drop_prob
        A = np.multiply(D,A)
        A = A/drop_prob
        cache = (linear_cache, activation_cache,D)
        caches.append(cache)
        
    AL, linear_cache, activation_cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)] , act_func='sigmoid')
    cache = (linear_cache, activation_cache,D)
    caches.append(cache)

    return AL, caches

In [12]:
def compute_cost(AL, Y):
    epsilon = 1e-15
    m = Y.shape[1]
    cost = np.dot((Y*np.log(AL+epsilon) + (1-Y)*np.log(1-AL+epsilon)),np.ones((m,1)))*(-1/m)
    cost = np.squeeze(cost)
    """m = Y.shape[1]
    
    logprobs = np.multiply(-np.log(AL+ epsilon),Y) + np.multiply(-np.log(1 - AL + epsilon), 1 - Y)
    cost = 1./m * np.nansum(logprobs)"""
    return cost

def compute_with_reg(AL, Y, parameters, lambd):
    m = Y.shape[1]
    L = len(parameters) // 2
    tmp_sum = 0
    for l in range(1,L+1):
        tmp_sum = np.sum(np.square(parameters['W'+str(l)])) + tmp_sum
    L2_cost = lambd*tmp_sum/(2*m)
    cost = compute_cost(AL, Y)
    total_cost = cost + L2_cost
    return total_cost

In [13]:
def linear_backward(dZ, cache, lambd=0):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = np.dot(dZ, A_prev.T)/m + lambd*W/m
    db = np.sum(dZ, axis = 1, keepdims = True)/m
    dA_prev = np.dot(W.T,dZ)
    
    return dA_prev, dW, db 

def linear_activation_backward(dA, cache, act_func,lambd=0):
    linear_cache, activation_cache = cache
    if act_func == 'relu':
        dZ = relu_backward_func(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache,lambd=lambd)
    elif act_func == 'sigmoid':
        dZ = sigmoid_backward_func(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache,lambd=lambd)
    return dA_prev, dW, db


def back_prog(AL, Y, caches,lambd):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    epsilon = 1e-15
    dAL = -(np.divide(Y, AL + epsilon) -np.divide(1 - Y, 1-AL + epsilon))

    linear_cache, activation_cache = caches[L-1]
    current_cache = (linear_cache, activation_cache)
    
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, act_func='sigmoid',lambd=lambd)
    grads['dA' + str(L-1)] = dA_prev_temp
    grads['dW' + str(L)] = dW_temp
    grads['db' + str(L)] = db_temp

    for l in reversed(range(L-1)):
        linear_cache, activation_cache = caches[l]
        current_cache = (linear_cache, activation_cache)
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dA_prev_temp, current_cache, act_func='relu',lambd=lambd)
        grads['dA' + str(l)] = dA_prev_temp
        grads['dW' + str(l + 1)] = dW_temp
        grads['db' + str(l + 1)] = db_temp
        
        
    return grads

In [14]:
def back_prog_D(AL, Y, caches, drop_prob):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    epsilon = 1e-15
    dAL = -(np.divide(Y, AL + epsilon) -np.divide(1 - Y, 1-AL + epsilon))

    linear_cache, activation_cache, D = caches[L-1]
    
    current_cache = (linear_cache, activation_cache)
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, act_func='sigmoid')
    grads['dW' + str(L)] = dW_temp
    grads['db' + str(L)] = db_temp
    
    
    for l in reversed(range(L-1)):
        linear_cache, activation_cache, D = caches[l]
        dA_prev_temp = np.multiply(D, dA_prev_temp)
        dA_prev_temp = dA_prev_temp/drop_prob
        grads['dA' + str(l+1)] = dA_prev_temp
        current_cache = (linear_cache, activation_cache)
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dA_prev_temp, current_cache, act_func='relu')
        grads['dW' + str(l + 1)] = dW_temp
        grads['db' + str(l + 1)] = db_temp
        
    return grads

In [15]:
def update_parameters(params, grads, learning_rate):
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2
    for l in range(L):
        parameters['W' + str(l+1)] = parameters['W' + str(l+1)] - grads['dW' + str(l+1)]*learning_rate
        parameters['b' + str(l+1)] = parameters['b' + str(l+1)] - grads['db' + str(l+1)]*learning_rate
    
    return parameters

In [16]:
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
   
    np.random.seed(seed)     
    m = X.shape[1]   
    mini_batches = []
        
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation].reshape((1,m))

    num_complete_minibatches = math.floor(m/mini_batch_size) 
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k*mini_batch_size : (k+1)*mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k*mini_batch_size : (k+1)*mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[:, num_complete_minibatches *mini_batch_size : ]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches *mini_batch_size : ]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches


def initialize_velocity(parameters):
    L = len(parameters) // 2 
    v = {}
    for l in range(L):
        v["dW" + str(l+1)] = np.zeros((parameters['W' + str(l+1)].shape[0],parameters['W' + str(l+1)].shape[1]))
        v["db" + str(l+1)] = np.zeros((parameters['b' + str(l+1)].shape[0],parameters['b' + str(l+1)].shape[1]))
        
    return v

def update_parameters_with_momentum(parameters, grads, v, beta, learning_rate):

    L = len(parameters) // 2 
    for l in range(L):
        v["dW" + str(l+1)] = beta*v["dW" + str(l+1)]+(1-beta)*grads['dW' + str(l+1)]
        v["db" + str(l+1)] = beta*v["db" + str(l+1)]+(1-beta)*grads['db' + str(l+1)]
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)]-learning_rate*v["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*v["db" + str(l+1)]
        
    return parameters, v

def initialize_adam(parameters) :
    L = len(parameters) // 2 
    v = {}
    s = {}
    
    for l in range(L):
        v["dW" + str(l+1)] = np.zeros((parameters["W" + str(l+1)].shape[0],parameters["W" + str(l+1)].shape[1]))
        v["db" + str(l+1)] = np.zeros((parameters["b" + str(l+1)].shape[0],parameters["b" + str(l+1)].shape[1]))
        s["dW" + str(l+1)] = np.zeros((parameters["W" + str(l+1)].shape[0],parameters["W" + str(l+1)].shape[1]))
        s["db" + str(l+1)] = np.zeros((parameters["b" + str(l+1)].shape[0],parameters["b" + str(l+1)].shape[1]))
    
    return v, s

def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01,beta1 = 0.9, beta2 = 0.999,  epsilon = 1e-8):
    
    L = len(parameters) // 2     
    v_corrected = {} 
    s_corrected = {} 
    for l in range(L):
        v["dW" + str(l+1)] = beta1*v["dW" + str(l+1)]+(1-beta1)*grads['dW'+str(l+1)]
        v["db" + str(l+1)] = beta1*v["db" + str(l+1)]+(1-beta1)*grads['db'+str(l+1)]
        v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)]/(1-pow(beta1,t))
        v_corrected["db" + str(l+1)] = v["db" + str(l+1)]/(1-pow(beta1,t))
        s["dW" + str(l+1)] = beta2*s["dW" + str(l+1)]+(1-beta2)*np.power(grads['dW'+str(l+1)],2)
        s["db" + str(l+1)] = beta2*s["db" + str(l+1)]+(1-beta2)*np.power(grads['db'+str(l+1)],2)
        s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)]/(1-pow(beta2,t))
        s_corrected["db" + str(l+1)] = s["db" + str(l+1)]/(1-pow(beta2,t))
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)]-learning_rate*np.divide(v_corrected["dW" + str(l+1)],np.sqrt(s_corrected["dW" + str(l+1)])+epsilon)
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*np.divide(v_corrected["db" + str(l+1)],np.sqrt(s_corrected["db" + str(l+1)])+epsilon)
       
    return parameters, v, s

In [17]:
def deep_NN(X,Y, layers_dims, learning_rate = 0.3, iter_no = 30000, pr_cost = False, drop_prob=1, lambd=0, initialization = 'random', 
            gd_check = 0, optimizer = "gd", beta = 0.9, beta1 = 0.9, beta2 = 0.999,  epsilon = 1e-8):
    np.random.seed(1)
    costs=[]
    t = 0
    if initialization == "zeros":
        parameters = initialize_parameters_zeros(layers_dims)
    elif initialization == "random":
        parameters = initialize_parameters_random(layers_dims)
    elif initialization == "he":
        parameters = initialize_parameters_he(layers_dims)

    if optimizer == "gd":
        pass 
    elif optimizer == "momentum":
        v = initialize_velocity(parameters)
    elif optimizer == "adam":
        v, s = initialize_adam(parameters)
        
    for i in range(0, iter_no):
        if drop_prob == 1:
            AL, caches = forward_prog(X, parameters)
        else:
            AL, caches = forward_prog_D(X, parameters, drop_prob)

        if lambd == 0:
            cost = compute_cost(AL,Y)
        else:
            cost = compute_with_reg(AL, Y, parameters,lambd)

        if drop_prob == 1:
            grads = back_prog(AL, Y, caches,lambd)
        else:
            grads = back_prog_D(AL, Y, caches,drop_prob)

        if(gd_check == 1):
            difference = gradient_check_n(parameters, grads, X, Y, layers_dims)
        
        if optimizer == "gd":
            parameters = update_parameters(parameters, grads, learning_rate)
        elif optimizer == "momentum":
            parameters, v = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate)
        elif optimizer == "adam":
            t = t + 1 
            parameters, v, s = update_parameters_with_adam(parameters, grads, v, s,
                                                   t, learning_rate, beta1, beta2,  epsilon)
            
        
        if pr_cost and i%1000 == 0 or i == iter_no-1:
            print("{} - Cost = {}".format(i,np.squeeze(cost)))
        if i%1000 == 0 or i == iter_no:
            costs.append(cost)
    return parameters, costs

In [76]:
def cross_validation(train_set,layers ,hyperparameters, n=5 ,percent = 0.3,random=42,pr=False):
    train_acc = 0
    valid_acc = 0
    
    for i in range(1,n+1):
        valid = df.sample(n = int(len(train_set)*percent),random_state=i*random)
        train = df.drop(valid.index)
        X_train, y_train = prepare_data(train)
        X_valid, y_valid = prepare_data(valid)
        layers_dims=[X_train.shape[0]]+layers
        parameters, cache = deep_NN(X_train, y_train, layers_dims,learning_rate=hyperparameters[0],iter_no=hyperparameters[1],
                                    pr_cost=pr,drop_prob=hyperparameters[2],lambd=hyperparameters[3], 
                                    initialization=hyperparameters[4], optimizer = hyperparameters[5])
        prediction, acc = predict(X_train, y_train, parameters)
        train_acc = train_acc + acc
        prediction, acc = predict(X_valid, y_valid, parameters)
        valid_acc = valid_acc + acc
        print("\n")
        
    mean_train = train_acc/n
    mean_valid = valid_acc/n
    print("Mean train acc: ", mean_train)
    print("Mean valid acc: ", mean_valid)
    return mean_train, mean_valid

In [19]:
def predict(X, y, parameters):
    m = X.shape[1]
    n = len(parameters) // 2 
    p = np.zeros((1,m))
    probas, caches = forward_prog(X, parameters)
    preds = (probas > 0.5).astype(int)
    accuracy = np.mean((preds == y).astype(int)).astype(float)
    print("Accuracy:",accuracy)
    
    return preds, accuracy

In [20]:
def prepare_data(df):
    X = df.drop(columns = 'Survived').values
    y = df['Survived'].values.reshape((-1,1))
    maxX = np.max(X,axis=0)
    X=X/maxX
    return X.T,y.T
def prepare_test_data(df):
    X = df.values
    maxX = np.max(X,axis=0)
    X=X/maxX
    return X.T

In [21]:
df = pd.read_csv('Data/train.csv')
df_test = pd.read_csv('Data/test.csv')

In [22]:
df.drop(columns='Cabin', inplace= True)
df_test.drop(columns='Cabin', inplace= True)

df.drop(columns='Name', inplace= True)
df_test.drop(columns='Name', inplace= True)

df.drop(columns='PassengerId', inplace= True)
df_test.drop(columns='PassengerId', inplace= True)

df.drop(columns='Ticket', inplace= True)
df_test.drop(columns='Ticket', inplace= True)

In [23]:
df.loc[df['Pclass'] == 1, 'Age'] = df.loc[df['Pclass'] == 1, 'Age'].fillna(df[df['Pclass'] == 1]['Age'].mean())
df.loc[df['Pclass'] == 2, 'Age'] = df.loc[df['Pclass'] == 2, 'Age'].fillna(df[df['Pclass'] == 2]['Age'].mean())
df.loc[df['Pclass'] == 3, 'Age'] = df.loc[df['Pclass'] == 3, 'Age'].fillna(df[df['Pclass'] == 3]['Age'].mean())
df['Embarked'] = df['Embarked'].fillna('S')

df_test.loc[df_test['Pclass'] == 1, 'Age'] = df_test.loc[df_test['Pclass'] == 1, 'Age'].fillna(df_test[df_test['Pclass'] == 1]['Age'].mean())
df_test.loc[df_test['Pclass'] == 2, 'Age'] = df_test.loc[df_test['Pclass'] == 2, 'Age'].fillna(df_test[df_test['Pclass'] == 2]['Age'].mean())
df_test.loc[df_test['Pclass'] == 3, 'Age'] = df_test.loc[df_test['Pclass'] == 3, 'Age'].fillna(df_test[df_test['Pclass'] == 3]['Age'].mean())

df_test['Fare'] = df_test['Fare'].fillna(df_test['Fare'].mean())

In [24]:
df['Fare']=df['Fare']/(df['SibSp'] + df['Parch'] +1)
df_test['Fare']=df_test['Fare']/(df_test['SibSp'] + df_test['Parch'] +1)

In [25]:
print(df.isnull().sum(),df_test.isnull().sum())

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64 Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


In [26]:
for label, content in df.items():
    if not pd.api.types.is_numeric_dtype(content):
        df[label] = content.astype("category").cat.as_ordered()
        df[label] = pd.Categorical(content).codes

for label, content in df_test.items():
    if not pd.api.types.is_numeric_dtype(content):
        df_test[label] = content.astype("category").cat.as_ordered()
        df_test[label] = pd.Categorical(content).codes

In [27]:
df['S_P_W']=df['Pclass']*df['Pclass']*(df['Sex'] + 1)
df_test['S_P_W']=df_test['Pclass']*df_test['Pclass']*(df_test['Sex'] + 1)

In [28]:
#np.random.seed(0)
valid = df.sample(n = int(len(df)*0.3),random_state=1)
train = df.drop(valid.index)

In [29]:
X_train, y_train = prepare_data(train)
X_valid, y_valid = prepare_data(valid)
X_Train, y_Train = prepare_data(df)
test = prepare_test_data(df_test)

In [30]:
layers_dims=[X_train.shape[0],25,13,6,1]

In [109]:
hyperparameters=[0.0001,4000,1,0.7,'he', 'adam']
layers=[30, 15, 6, 1]
t,v =cross_validation(df,layers ,hyperparameters,n=5,random= 121)

3999 - Cost = 0.4371381689248058
Accuracy: 0.842948717948718
Accuracy: 0.8202247191011236


3999 - Cost = 0.40808885697706654
Accuracy: 0.8413461538461539
Accuracy: 0.8202247191011236


3999 - Cost = 0.4415161030910267
Accuracy: 0.8221153846153846
Accuracy: 0.8389513108614233


3999 - Cost = 0.4221981730761701
Accuracy: 0.842948717948718
Accuracy: 0.8314606741573034


3999 - Cost = 0.4282992623859263
Accuracy: 0.8381410256410257
Accuracy: 0.850187265917603


Mean train acc:  0.8375
Mean valid acc:  0.8322097378277153


In [110]:
hyperparameters=[0.0001,5000,1,0.8,'he', 'adam']
layers=[30, 15, 6, 1]
t,v =cross_validation(df,layers ,hyperparameters,n=5,random = 121)

4999 - Cost = 0.43167850035142435
Accuracy: 0.8365384615384616
Accuracy: 0.8277153558052435


4999 - Cost = 0.40077600315758516
Accuracy: 0.8525641025641025
Accuracy: 0.8202247191011236


4999 - Cost = 0.43541467780915416
Accuracy: 0.8253205128205128
Accuracy: 0.8426966292134831


4999 - Cost = 0.41432746574424784
Accuracy: 0.844551282051282
Accuracy: 0.8277153558052435


4999 - Cost = 0.4229135004940923
Accuracy: 0.8365384615384616
Accuracy: 0.846441947565543


Mean train acc:  0.839102564102564
Mean valid acc:  0.8329588014981274


In [94]:
layers=[X_train[0],20, 3, 1]
parameters, cache = deep_NN(X_train, y_train,layers_dims,learning_rate=0.0001,iter_no=4000,pr_cost=True,initialization='he',optimizer='adam')

0 - Cost = 0.7978285477066022
1000 - Cost = 0.47874981625896484
2000 - Cost = 0.39003065623583183
3000 - Cost = 0.35848508225533465
3999 - Cost = 0.34472422713664086


In [95]:
pr, _ = predict(X_valid, y_valid, parameters)
pred = pr.astype(int)[0]
print(pred)

Accuracy: 0.7752808988764045
[1 0 1 1 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 0 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0
 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 1 0 0 0 1 1 1 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0
 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 1 0 0 1
 0 0 1 0 1 0 0 1 1 1 1 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0 0
 0 0 0 0 1 1 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 1 1 0 1 0
 0 1 0 1 1 0 0 0]


In [None]:
layers=[X_train[0],20, 3, 1]
parameters, cache = deep_NN(X_Train, y_Train,layers_dims,learning_rate=0.0001,iter_no=5000,pr_cost=True,
                           lambd=0.8, initialization='he',optimizer='adam')

0 - Cost = 0.828698644913157
1000 - Cost = 0.5392379347729824
2000 - Cost = 0.4635748151237043
3000 - Cost = 0.4370948700184106
4000 - Cost = 0.4262344850814438


In [97]:
prediction, _ = predict(test,1,parameters)
prediction = prediction.astype(int)[0]
print(prediction)

Accuracy: 0.33253588516746413
[0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 1 0 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 1 0 0 1 1 0 0 0
 1 1 0 1 0 1 1 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0
 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 1 1 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1
 0 1 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 1 0
 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1
 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 1 1 1 1 0 0 1 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0
 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 0 0 1 1 0 1 0 0 0 1 0
 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 1 1 0 0 0
 0 1 1 1 1 0 0 1 0 0 0]


In [98]:
write= pd.read_csv("Data/test.csv")
result=pd.DataFrame({"PassengerId":write['PassengerId'],"Survived":prediction})
result.to_csv("resultdeepNN.csv", index=False)

In [61]:
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,S_P_W
0,0,3,1,22.00000,1,0,3.62500,2,18
1,1,1,0,38.00000,1,0,35.64165,0,1
2,1,3,0,26.00000,0,0,7.92500,2,9
3,1,1,0,35.00000,1,0,26.55000,2,1
4,0,3,1,35.00000,0,0,8.05000,2,18
...,...,...,...,...,...,...,...,...,...
886,0,2,1,27.00000,0,0,13.00000,2,8
887,1,1,0,19.00000,0,0,30.00000,2,1
888,0,3,0,25.14062,1,2,5.86250,2,9
889,1,1,1,26.00000,0,0,30.00000,0,2
