In [768]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy

In [769]:
def init_parameters(layer_dims):
    np.random.seed(1)
    parameters = {}
    L = len(layer_dims)
    for l in range(1,L):
        #parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #*0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1)) 
    return parameters

In [770]:
def sigmoid_func(Z):
    A = 1/(1+np.exp(-Z))
    cache = Z
    return A, cache

def relu_func(Z):
    A = np.maximum(0,Z)
    cache = Z 
    return A, cache

In [771]:
def linear_forward(A, W, b):
    Z = np.dot(W,A) + b
    cache = (A, W, b)
    return Z, cache

def linear_activation_forward(A_prev, W, b, act_func):
    if act_func == 'sigmoid':
        Z, linear_cache = linear_forward(A_prev,W,b)
        A, activation_cache = sigmoid_func(Z)

    elif act_func == 'relu':
        Z, linear_cache = linear_forward(A_prev,W,b)
        A, activation_cache = relu_func(Z)

    cache = (linear_cache, activation_cache)

    return A, cache

def NN_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2 

    for l in range(1,L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], act_func='relu')
        caches.append(cache)
        
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], act_func='sigmoid')
    caches.append(cache)

    return AL, caches

In [773]:
def compute_cost(AL, Y,i):
    m = Y.shape[1]
    cost = np.dot((Y*np.log(AL) + (1-Y)*np.log(1-AL)),np.ones((m,1)))*(-1/m)
    cost = np.squeeze(cost)
    return cost

In [331]:
def relu_backward_func(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True) 
    dZ[Z <= 0] = 0
    return dZ

def sigmoid_backward_func(dA, cache):
    Z = cache
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    return dZ

In [332]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = np.dot(dZ, A_prev.T)/m
    db = np.sum(dZ, axis = 1, keepdims = True)/m
    dA_prev = np.dot(W.T,dZ)
    
    return dA_prev, dW, db 

def linear_activation_backward(dA, cache, act_func):
    linear_cache, activation_cache = cache
    if act_func == 'relu':
        dZ = relu_backward_func(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif act_func == 'sigmoid':
        dZ = sigmoid_backward_func(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db

def NN_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    dAL = -(np.divide(Y, AL) -np.divide(1 - Y, 1-AL))

    current_cache = caches[L-1]
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, act_func='sigmoid')
    grads['dA' + str(L-1)] = dA_prev_temp
    grads['dW' + str(L)] = dW_temp
    grads['db' + str(L)] = db_temp

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dA_prev_temp, current_cache, act_func='relu')
        grads['dA' + str(l)] = dA_prev_temp
        grads['dW' + str(l + 1)] = dW_temp
        grads['db' + str(l + 1)] = db_temp
        
        
    return grads

In [333]:
def update_parameters(params, grads, learning_rate):
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2
    for l in range(L):
        parameters['W' + str(l+1)] = parameters['W' + str(l+1)] - grads['dW' + str(l+1)]*learning_rate
        parameters['b' + str(l+1)] = parameters['b' + str(l+1)] - grads['db' + str(l+1)]*learning_rate
    
    return parameters

In [496]:
def NN_model(X,Y, layers_dims, learning_rate = 0.0075, iter_no = 1000000, pr_cost = False):
    np.random.seed(1)
    costs=[]
    parameters = init_parameters(layers_dims)
    for i in range(0, iter_no):
        AL, caches = NN_model_forward(X, parameters)
        cost = compute_cost(AL,Y,i)
        grads = NN_model_backward(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate = learning_rate)
        if pr_cost and i%1000 == 0 or i == iter_no-1:
            print("{} - Cost = {}".format(i,np.squeeze(cost)))
        if i%1000 == 0 or i == iter_no:
            costs.append(cost)
    return parameters, costs

In [753]:
def predict_test(X, y, parameters):
    m = X.shape[1]
    n = len(parameters) // 2 
    p = np.zeros((1,m))
    probas, caches = NN_model_forward(X, parameters)
    for i in range(0, probas.shape[1]):
        if probas[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    print("Accuracy: "  + str(np.sum((p == y)/m)))
        
    return p

In [481]:
def prepare_data(df):
    X = df.drop(columns = 'Survived').values
    y = df['Survived'].values.reshape((-1,1))
    maxX = np.max(X,axis=0)
    X=X/maxX
    return X.T,y.T
def prepare_test_data(df):
    X = df.values
    maxX = np.max(X,axis=0)
    X=X/maxX
    return X.T

In [815]:
df = pd.read_csv('Data/train.csv')
df_test = pd.read_csv('Data/test.csv')

In [816]:
df.drop(columns='Cabin', inplace= True)
df_test.drop(columns='Cabin', inplace= True)

df.drop(columns='Name', inplace= True)
df_test.drop(columns='Name', inplace= True)

df.drop(columns='PassengerId', inplace= True)
df_test.drop(columns='PassengerId', inplace= True)

df.drop(columns='Ticket', inplace= True)
df_test.drop(columns='Ticket', inplace= True)

In [817]:
df.loc[df['Pclass'] == 1, 'Age'] = df.loc[df['Pclass'] == 1, 'Age'].fillna(df[df['Pclass'] == 1]['Age'].mean())
df.loc[df['Pclass'] == 2, 'Age'] = df.loc[df['Pclass'] == 2, 'Age'].fillna(df[df['Pclass'] == 2]['Age'].mean())
df.loc[df['Pclass'] == 3, 'Age'] = df.loc[df['Pclass'] == 3, 'Age'].fillna(df[df['Pclass'] == 3]['Age'].mean())
df['Embarked'] = df['Embarked'].fillna('S')

df_test.loc[df_test['Pclass'] == 1, 'Age'] = df_test.loc[df_test['Pclass'] == 1, 'Age'].fillna(df_test[df_test['Pclass'] == 1]['Age'].mean())
df_test.loc[df_test['Pclass'] == 2, 'Age'] = df_test.loc[df_test['Pclass'] == 2, 'Age'].fillna(df_test[df_test['Pclass'] == 2]['Age'].mean())
df_test.loc[df_test['Pclass'] == 3, 'Age'] = df_test.loc[df_test['Pclass'] == 3, 'Age'].fillna(df_test[df_test['Pclass'] == 3]['Age'].mean())

df_test['Fare'] = df_test['Fare'].fillna(df_test['Fare'].mean())

In [818]:
df['Fare']=df['Fare']/(df['SibSp'] + df['Parch'] +1)
df_test['Fare']=df_test['Fare']/(df_test['SibSp'] + df_test['Parch'] +1)

In [819]:
print(df.isnull().sum(),df_test.isnull().sum())

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64 Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64


In [820]:
for label, content in df.items():
    if not pd.api.types.is_numeric_dtype(content):
        df[label] = content.astype("category").cat.as_ordered()
        df[label] = pd.Categorical(content).codes

for label, content in df_test.items():
    if not pd.api.types.is_numeric_dtype(content):
        df_test[label] = content.astype("category").cat.as_ordered()
        df_test[label] = pd.Categorical(content).codes

In [821]:
df['S_P_W']=df['Pclass']*df['Pclass']*(df['Sex'] + 1)
df_test['S_P_W']=df_test['Pclass']*df_test['Pclass']*(df_test['Sex'] + 1)

In [961]:
#np.random.seed(0)
valid = df.sample(n = int(len(df)*0.3),random_state=77)
train = df.drop(valid.index)

In [962]:
X_train, y_train = prepare_data(train)
X_valid, y_valid = prepare_data(valid)
X_Train, y_Valid = prepare_data(df)
test = prepare_test_data(df_test)

In [963]:
layer_dims_dev=[X_train.shape[0],21,12,5,1]
layer_dims=[X_train.shape[0],25,13,6,1]

In [965]:
paramerers, cache = NN_model(X_train,y_train,layer_dims,learning_rate=0.025,iter_no=6100,pr_cost=True)

0 - Cost = 0.6357176344895739
1000 - Cost = 0.47471230449017554
2000 - Cost = 0.4382877076439965
3000 - Cost = 0.4203198891461802
4000 - Cost = 0.41151643192026816
5000 - Cost = 0.40487162393150167
6000 - Cost = 0.3987586702521141
6099 - Cost = 0.39823000038706174


In [966]:
pred_T = predict_test(X_train,y_train,paramerers)
_ = predict_test(X_train,1,paramerers).astype(int)
pred_V = predict_test(X_valid,y_valid,paramerers)
_ = predict_test(X_valid,1,paramerers).astype(int)

Accuracy: 0.8397435897435896
Accuracy: 0.34455128205128205
Accuracy: 0.7940074906367042
Accuracy: 0.32958801498127344


In [969]:
paramerers, cache = NN_model(X_Train,y_Valid,layer_dims,learning_rate=0.05,iter_no=6100,pr_cost=True)

0 - Cost = 0.6294604467817047
1000 - Cost = 0.4315565559379658
2000 - Cost = 0.4071065350023799
3000 - Cost = 0.40434692666378486
4000 - Cost = 0.4019374606482131
5000 - Cost = 0.3967457951886351
6000 - Cost = 0.395354588157561
6099 - Cost = 0.3921182632926092


In [970]:
prediction = predict_test(test,1,paramerers).astype(int)

Accuracy: 0.28708133971291866


In [971]:
write= pd.read_csv("Data/test.csv")
result=pd.DataFrame({"PassengerId":write['PassengerId'],"Survived":prediction[0]})
result.to_csv("resultNN.csv", index=False)

In [972]:
prediction

array([[0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
        1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
        1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
        1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
        1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
        0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 