
# Titanic

In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [32]:
test_df = pd.read_csv('test.csv', index_col='PassengerId')
train_df = pd.read_csv('train.csv', index_col='PassengerId')

In [33]:
train.isna().sum()

Survived      0
Pclass        0
Name          0
Sex           0
Age         177
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [34]:
# fill in missing values

#mean of age 
print('The mean of age is %.2f' % train_df['Age'].mean())
#media of age 
print('The median of age is %.2f' % train_df['Age'].median())

train_data = train_df.copy()
train_data['Age'].fillna(train_df['Age'].median(), inplace= True)
train_data['Embarked'].fillna(train_df['Embarked'].value_counts().idxmax(), inplace=True)
train_data.drop('Cabin', axis = 1)

The mean of age is 29.70
The median of age is 28.00


Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,S
...,...,...,...,...,...,...,...,...,...,...
887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,S
888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,S
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,28.0,1,2,W./C. 6607,23.4500,S
890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C


In [71]:
combined = [test_set_df, train_set_df]

In [72]:
rep = {"male":0,"female":1}
for x in combined:
    x['Sex']=x['Sex'].map(rep)

In [73]:
train_set_df.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.05,,S


In [49]:
train_y_df = train_set_df['Survived']
train_x_df = train_set_df.drop('Survived', 1)
test_x_df = test_set_df

train_x = train_x_df.to_numpy()
train_y = train_y_df.to_numpy()
test_x = test_x_df.to_numpy()

In [50]:
test_x.shape, test_y.shape

((418, 11), (891,))

In [51]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [52]:
def initialize_with_zeros(dim):
    w = np.zeros((dim,1))
    b = 0
    
    assert(w.shape == (dim,1))
    assert(isinstance(b,float) or isinstance(b,int))
    
    return w,b

In [59]:
def propogate(w, b, X, Y):
    m = X.shape[1]
    
    print(w)
    print(X)
    A = sigmoid(np.dot(w.T, X) + b)
    cost = -(1/m)*np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))
    
    dw = np.dot((1/m)*X,(A-Y).T)
    db = 1/m*np.sum(A-Y)
    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = {'dw': dw, 'db': db}
    
    return grads

In [54]:
def optimize(w, b, X, Y, num_iterations, lr, print_cost = False):
    costs = []

    for i in range(num_iterations):
        grads, cost = propogate(w, b, X, Y)
        
        dw = grads['dw']
        db = grads['db']
        
        w = w - lr*dw
        b = b - lr*db
        
        if i % 100 == 0:
            costs.append(cost)
            
        if print_cost and i%100==0:
            print('Cost after iteration %i: %f' %(i, cost))
            
        params = {'w': w, 'b': b}
        grads = {'dw': dw, 'db': db}
        
        return params, grads, costs

In [55]:
def predict(w, b, X):
    m = x.shape[1]
    y_prediction = np.zeros((1,m))
    w = w.reshape(x.shape[0],1)
    
    A = sigmoid(np.dot(w.T, X) + b)
    
    for i in range(A.shape[1]):
        y_prediction[0,i] = 1 if A[0,i] > 0.5 else 0
    
    assert(y_prediction == (1,m))
    
    return y_prediction

In [56]:
def model(x_train, y_train, x_test, num_iterations=2000, lr=0.5, print_cost=True):
    w,b = initialize_with_zeros(x_train.shape[0])
    
    params, grads, costs = optimize(w, b, x_train, y_train, num_iterations, lr, print_cost=print_cost)
    
    w = params['w']
    b = params['b']
    
#     y_prediction_test = predict(w, b, x_test)
    y_prediction_train = predict(w, b, x_train)
    
    print("Train acc: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
#     print("Test acc: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
    
    d = {'costs': costs, 'y_prediction_train': y_prediction_train, 'w': w, 'b': b, 'learning_rate': lr, 
         'num_iterations': num_iterations}
    
    return d

In [60]:
d = model(train_x, train_y, test_x)

[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.

TypeError: can't multiply sequence by non-int of type 'float'