In [35]:
import numpy as np
import copy
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
from data import *
from data_to_csv import *


In [36]:
X,Y,test_data = data_loading()

In [37]:
X.shape


(4, 891)

In [38]:
def sigmoid(z):
    sig = 1/(1+np.exp(-z))
    return sig

In [39]:
def layer_sizes(X,Y):
    n_x = X.shape[0]
    n_h = 50
    n_y = Y.shape[0]

    return (n_x,n_h,n_y)

In [40]:
def initialize_parameters(n_x,n_h,n_y):
    W1 = np.random.randn(n_h,n_x) * 0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y,n_h) * 0.01
    b2 = np.zeros((n_y,1))

    parameters = {
        'W1' : W1,
        'b1' : b1,
        'W2' : W2,
        'b2' : b2
    }
    return parameters

In [41]:
def forward_propagation(X,parameters):
    
    W1 =  parameters['W1']
    b1 =  parameters['b1']
    W2 =  parameters['W2']
    b2 =  parameters['b2']

    Z1 = np.dot(W1,X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2,A1) + b2
    A2 = sigmoid(Z2)

    assert(A2.shape == (1, X.shape[1]))

    cache = {
        'Z1' : Z1,
        'A1' : A1,
        'Z2' : Z2,
        'A2' : A2
    }

    return A2,cache

In [42]:
def compute_cost(A2,Y):
    m = Y.shape[1]

    logprobs = np.multiply(Y,np.log(A2)) + np.multiply(1-Y,np.log(1-A2))
    cost = -(np.sum(logprobs))/m

    cost = float(np.squeeze(cost))

    return cost

In [43]:
def back_propagation(parameters,cache,X,Y):

    m = X.shape[1]

    W1 = parameters['W1']
    W2 = parameters['W2']

    A1 = cache['A1']
    A2 = cache['A2']

    dZ2 = A2 - Y
    dW2 = (np.dot(dZ2,A1.T))/m
    db2 = np.sum(dZ2,axis=1,keepdims=True)/m
    dZ1 = np.dot(W2.T,dZ2) * (1-np.power(A1,2))
    dW1 = np.dot(dZ1,X.T)/m
    db1 = np.sum(dZ1,axis=1,keepdims=True)/m

    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads
    

In [44]:
def update_parameters(prarameters,grads,learning_rate=1.5):
    W1 = copy.deepcopy(prarameters['W1'])
    b1 = copy.deepcopy(prarameters['b1'])
    W2 = copy.deepcopy(prarameters['W2'])
    b2 = copy.deepcopy(prarameters['b2'])


    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2

    parameters = {
        'W1' : W1,
        'b1' : b1,
        'W2' : W2,
        'b2' : b2
    }
    return parameters

In [45]:
def model(X,Y,n_h,num_iterations = 10000,print_cost = False):
    n_x = layer_sizes(X,Y)[0]
    n_y = layer_sizes(X,Y)[2]

    parameters = initialize_parameters(n_x,n_h,n_y)

    for i in range(0,num_iterations):
        
        A2,cache = forward_propagation(X,parameters)
        cost = compute_cost(A2,Y)
        grads = back_propagation(parameters,cache,X,Y)
        parameters = update_parameters(parameters,grads)

        if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))

    return parameters

In [46]:
def predict(parameters,X):
    A2,cache = forward_propagation(X,parameters)
    predictions = (A2>0.5)

    return predictions
    

In [47]:
nn_model = model(X, Y, n_h = 4, num_iterations = 30000, print_cost=False)


In [48]:
def accuracy():
    poodiction = predict(parameters,X)
    poodiction = np.array(poodiction,dtype =int)
    correct_pred = np.sum(poodiction==Y)
    print(correct_pred/Y.shape[1])

In [49]:
predictions_on_test = predict(nn_model, test_data)

In [56]:
predictions_on_test = predictions_on_test.flatten()

In [57]:
predictions_on_test

array([False,  True, False, False,  True, False,  True, False,  True,
       False, False, False,  True, False,  True,  True, False, False,
        True,  True, False, False,  True, False,  True, False,  True,
       False, False, False, False, False, False, False, False, False,
        True,  True, False, False, False, False, False,  True,  True,
       False, False, False,  True, False, False, False,  True,  True,
       False, False, False, False, False,  True, False, False, False,
        True, False,  True,  True, False, False,  True,  True, False,
        True, False,  True, False, False,  True, False,  True, False,
       False, False, False, False, False,  True,  True,  True, False,
        True, False,  True, False, False, False,  True, False,  True,
       False,  True, False, False, False,  True, False, False, False,
       False, False, False,  True,  True,  True,  True, False, False,
       False, False,  True,  True, False,  True, False, False, False,
       False, False,

In [58]:
data = pd.read_csv("result.csv")

In [59]:
output = pd.DataFrame({
    'PassengerId': data['PassengerId'],
    'Prediction': predictions_on_test
})

In [60]:
output.to_csv('new_file.csv', index=False)