In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
from sklearn import preprocessing as pp

In [3]:
from sklearn import cross_validation as cv
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression



In [5]:
from sklearn.svm import SVC
from sklearn import tree
from sklearn.cross_validation import cross_val_score

In [6]:
def get_final_output(clf, X, y, X_test, out_file):
    clf.fit(X, y)
    y_ = clf.predict(X_test)
    y_results = pd.DataFrame()
    
    y_results["PassengerId"] = X_test.index
    y_results["Survived"] = y_
    
    if(out_file):
        y_results.to_csv(out_file, encoding='utf-8', index=False)

In [7]:
def report_train_accuracy(clf, X, y):
    clf.fit(X, y)
    y_pred = clf.predict(X)
    return metrics.accuracy_score(y, y_pred)

In [8]:
def report_train_test_accuracy(clf, X, y, test_size, random_state):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= test_size, random_state = random_state)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return metrics.accuracy_score(y_test, y_pred)

### Strip input data 

In [360]:
train = pd.read_csv("train.csv", index_col=0)
test = pd.read_csv("test.csv", index_col=0)
print(train.shape, test.shape)

(891, 11) (418, 10)


In [361]:
train.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [362]:
cols_to_drop = ["Name", "Ticket", "Cabin"]

In [363]:
train.drop(cols_to_drop, axis= 1, inplace=True)
test.drop(cols_to_drop, axis= 1, inplace=True)

In [364]:
gender_mapping = {'female':0, 'male':1}
train['Sex'] = train['Sex'].map(gender_mapping)
test['Sex'] = test['Sex'].map(gender_mapping)

In [365]:
embarked_mapping = {'S':0, 'C':1, 'Q': 2}
train['Embarked'] = train['Embarked'].map(embarked_mapping)
test['Embarked'] = test['Embarked'].map(embarked_mapping)

In [366]:
test.describe()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
count,418.0,418.0,332.0,418.0,418.0,417.0,418.0
mean,2.26555,0.636364,30.27259,0.447368,0.392344,35.627188,0.464115
std,0.841838,0.481622,14.181209,0.89676,0.981429,55.907576,0.685516
min,1.0,0.0,0.17,0.0,0.0,0.0,0.0
25%,1.0,0.0,21.0,0.0,0.0,7.8958,0.0
50%,3.0,1.0,27.0,0.0,0.0,14.4542,0.0
75%,3.0,1.0,39.0,1.0,0.0,31.5,1.0
max,3.0,1.0,76.0,8.0,9.0,512.3292,2.0


In [367]:
train.describe()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0,889.0
mean,0.383838,2.308642,0.647587,29.699118,0.523008,0.381594,32.204208,0.362205
std,0.486592,0.836071,0.47799,14.526497,1.102743,0.806057,49.693429,0.636157
min,0.0,1.0,0.0,0.42,0.0,0.0,0.0,0.0
25%,0.0,2.0,0.0,20.125,0.0,0.0,7.9104,0.0
50%,0.0,3.0,1.0,28.0,0.0,0.0,14.4542,0.0
75%,1.0,3.0,1.0,38.0,1.0,0.0,31.0,1.0
max,1.0,3.0,1.0,80.0,8.0,6.0,512.3292,2.0


#### Dealing with missing values

In [368]:
test.Age.fillna(inplace=True, value=test.Age.mean())
test.Fare.fillna(inplace=True, value=test.Fare.mean())

In [369]:
test.describe()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
count,418.0,418.0,418.0,418.0,418.0,418.0,418.0
mean,2.26555,0.636364,30.27259,0.447368,0.392344,35.627188,0.464115
std,0.841838,0.481622,12.634534,0.89676,0.981429,55.8405,0.685516
min,1.0,0.0,0.17,0.0,0.0,0.0,0.0
25%,1.0,0.0,23.0,0.0,0.0,7.8958,0.0
50%,3.0,1.0,30.27259,0.0,0.0,14.4542,0.0
75%,3.0,1.0,35.75,1.0,0.0,31.5,1.0
max,3.0,1.0,76.0,8.0,9.0,512.3292,2.0


In [370]:
train.dropna(inplace=True)
test.dropna(inplace=True)
print(train.shape, test.shape)

(712, 8) (418, 7)


In [371]:
y = train.Survived
train.drop("Survived", axis= 1, inplace=True)
X = train
X_final = test

In [372]:
print(X.shape, X_final.shape, y.shape)

(712, 7) (418, 7) (712,)


In [373]:
y.head()

PassengerId
1    0
2    1
3    1
4    1
5    0
Name: Survived, dtype: int64

###  using NN to imporve the results 

In [374]:
import keras

In [375]:
keras.__version__

'2.0.9'

In [376]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.optimizers import SGD

In [377]:
batch_size = 16    #we have only 712 elements
num_classes = 2    #Survived or not
epochs=200

In [378]:
test_size = 0.2
random_state = 4

In [379]:
X.max().astype(np.float64)

Pclass        3.0000
Sex           1.0000
Age          80.0000
SibSp         5.0000
Parch         6.0000
Fare        512.3292
Embarked      2.0000
dtype: float64

In [380]:
X_max = X.append(X_final).max().astype(np.float64)
print(X_max)
X_scaled = X/X.max().astype(np.float64)
X_final  = X_final/X.max().astype(np.float64)

Pclass        3.0000
Sex           1.0000
Age          80.0000
SibSp         8.0000
Parch         9.0000
Fare        512.3292
Embarked      2.0000
dtype: float64


In [381]:
X_scaled.head()

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1.0,1.0,0.275,0.2,0.0,0.014151,0.0
2,0.333333,0.0,0.475,0.2,0.0,0.139136,0.5
3,1.0,0.0,0.325,0.0,0.0,0.015469,0.0
4,0.333333,0.0,0.4375,0.2,0.0,0.103644,0.0
5,1.0,1.0,0.4375,0.0,0.0,0.015713,0.0


In [382]:
y.head()

PassengerId
1    0
2    1
3    1
4    1
5    0
Name: Survived, dtype: int64

In [383]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size= test_size, random_state = random_state)

In [384]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print(type(X_train), type(X_test), type(y_train), type(y_test))

(569, 7) (143, 7) (569,) (143,)
<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'> <class 'pandas.core.series.Series'>


In [385]:
y_train = keras.utils.to_categorical(np.array(y_train), num_classes)
y_test = keras.utils.to_categorical(np.array(y_test), num_classes)

#y_train = np.array(y_train)
#y_test = np.array(y_test)

In [386]:
X_train = (np.matrix(X_train))
X_test = (np.matrix(X_test))

In [387]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print(type(X_train), type(X_test), type(y_train), type(y_test))

(569, 7) (143, 7) (569, 2) (143, 2)
<class 'numpy.matrixlib.defmatrix.matrix'> <class 'numpy.matrixlib.defmatrix.matrix'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [388]:
y_train

array([[ 0.,  1.],
       [ 1.,  0.],
       [ 1.,  0.],
       ..., 
       [ 0.,  1.],
       [ 0.,  1.],
       [ 1.,  0.]])

###  Build the Architecture

In [389]:
#Instatiaate a model
model = Sequential()

In [390]:
#Add later to it
model.add(Dense(70, activation="relu", input_shape=(7,)))
model.add(Dense(70, activation="relu", input_shape=(70,)))
model.add(Dense(70, activation="relu", input_shape=(70,)))
model.add(Dense(70, activation="relu", input_shape=(70,)))
model.add(Dense(2, activation='softmax'))

In [391]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_69 (Dense)             (None, 70)                560       
_________________________________________________________________
dense_70 (Dense)             (None, 70)                4970      
_________________________________________________________________
dense_71 (Dense)             (None, 70)                4970      
_________________________________________________________________
dense_72 (Dense)             (None, 70)                4970      
_________________________________________________________________
dense_73 (Dense)             (None, 2)                 142       
Total params: 15,612
Trainable params: 15,612
Non-trainable params: 0
_________________________________________________________________


In [392]:
sgd = SGD(lr= 0.01)

In [393]:
#Complie the model for running
model.compile(loss='categorical_crossentropy',
             optimizer = sgd,
             metrics =['accuracy'])

In [394]:
model.fit(X_train, y_train, batch_size= batch_size, epochs=180, verbose=1, validation_data=(X_test, y_test))

Train on 569 samples, validate on 143 samples
Epoch 1/180
Epoch 2/180
Epoch 3/180
Epoch 4/180
Epoch 5/180
Epoch 6/180
Epoch 7/180
Epoch 8/180
Epoch 9/180
Epoch 10/180
Epoch 11/180
Epoch 12/180
Epoch 13/180
Epoch 14/180
Epoch 15/180
Epoch 16/180
Epoch 17/180
Epoch 18/180
Epoch 19/180
Epoch 20/180
Epoch 21/180
Epoch 22/180
Epoch 23/180
Epoch 24/180
Epoch 25/180
Epoch 26/180
Epoch 27/180
Epoch 28/180
Epoch 29/180
Epoch 30/180
Epoch 31/180
Epoch 32/180
Epoch 33/180
Epoch 34/180
Epoch 35/180
Epoch 36/180
Epoch 37/180
Epoch 38/180
Epoch 39/180
Epoch 40/180
Epoch 41/180
Epoch 42/180
Epoch 43/180
Epoch 44/180
Epoch 45/180
Epoch 46/180
Epoch 47/180
Epoch 48/180
Epoch 49/180
Epoch 50/180
Epoch 51/180
Epoch 52/180
Epoch 53/180
Epoch 54/180
Epoch 55/180
Epoch 56/180
Epoch 57/180
Epoch 58/180
Epoch 59/180
Epoch 60/180
Epoch 61/180


Epoch 62/180
Epoch 63/180
Epoch 64/180
Epoch 65/180
Epoch 66/180
Epoch 67/180
Epoch 68/180
Epoch 69/180
Epoch 70/180
Epoch 71/180
Epoch 72/180
Epoch 73/180
Epoch 74/180
Epoch 75/180
Epoch 76/180
Epoch 77/180
Epoch 78/180
Epoch 79/180
Epoch 80/180
Epoch 81/180
Epoch 82/180
Epoch 83/180
Epoch 84/180
Epoch 85/180
Epoch 86/180
Epoch 87/180
Epoch 88/180
Epoch 89/180
Epoch 90/180
Epoch 91/180
Epoch 92/180
Epoch 93/180
Epoch 94/180
Epoch 95/180
Epoch 96/180
Epoch 97/180
Epoch 98/180
Epoch 99/180
Epoch 100/180
Epoch 101/180
Epoch 102/180
Epoch 103/180
Epoch 104/180
Epoch 105/180
Epoch 106/180
Epoch 107/180
Epoch 108/180
Epoch 109/180
Epoch 110/180
Epoch 111/180
Epoch 112/180
Epoch 113/180
Epoch 114/180
Epoch 115/180
Epoch 116/180
Epoch 117/180
Epoch 118/180
Epoch 119/180
Epoch 120/180
Epoch 121/180


Epoch 122/180
Epoch 123/180
Epoch 124/180
Epoch 125/180
Epoch 126/180
Epoch 127/180
Epoch 128/180
Epoch 129/180
Epoch 130/180
Epoch 131/180
Epoch 132/180
Epoch 133/180
Epoch 134/180
Epoch 135/180
Epoch 136/180
Epoch 137/180
Epoch 138/180
Epoch 139/180
Epoch 140/180
Epoch 141/180
Epoch 142/180
Epoch 143/180
Epoch 144/180
Epoch 145/180
Epoch 146/180
Epoch 147/180
Epoch 148/180
Epoch 149/180
Epoch 150/180
Epoch 151/180
Epoch 152/180
Epoch 153/180
Epoch 154/180
Epoch 155/180
Epoch 156/180
Epoch 157/180
Epoch 158/180
Epoch 159/180
Epoch 160/180
Epoch 161/180
Epoch 162/180
Epoch 163/180
Epoch 164/180
Epoch 165/180
Epoch 166/180
Epoch 167/180
Epoch 168/180
Epoch 169/180
Epoch 170/180
Epoch 171/180
Epoch 172/180
Epoch 173/180
Epoch 174/180
Epoch 175/180
Epoch 176/180
Epoch 177/180
Epoch 178/180
Epoch 179/180
Epoch 180/180


<keras.callbacks.History at 0x7f14947a55c0>

In [395]:
score = model.evaluate(X_test, y_test, verbose=0)

In [396]:
print('Test Loss', score[0])
print('Test Accuracy', score[1])

Test Loss 0.395596502127
Test Accuracy 0.853146853564


### Retrain the model 

In [397]:
X_scaled = np.matrix(X_scaled)
y_scaled = keras.utils.to_categorical(np.array(y), num_classes)
print(type(X_scaled), type(y_scaled))

<class 'numpy.matrixlib.defmatrix.matrix'> <class 'numpy.ndarray'>


In [398]:
model = Sequential()

#Add later to it
model.add(Dense(70, activation="relu", input_shape=(7,)))
model.add(Dense(70, activation="relu", input_shape=(70,)))
model.add(Dense(70, activation="relu", input_shape=(70,)))
model.add(Dense(70, activation="relu", input_shape=(70,)))
model.add(Dense(2, activation='softmax'))

model.compile(loss='categorical_crossentropy',
             optimizer = sgd,
             metrics =['accuracy'])

model.fit(X_scaled, y_scaled, batch_size= batch_size, epochs=220, verbose=1)

Epoch 1/220
Epoch 2/220
Epoch 3/220
Epoch 4/220
Epoch 5/220
Epoch 6/220
Epoch 7/220
Epoch 8/220
Epoch 9/220
Epoch 10/220
Epoch 11/220
Epoch 12/220
Epoch 13/220
Epoch 14/220
Epoch 15/220
Epoch 16/220
Epoch 17/220
Epoch 18/220
Epoch 19/220
Epoch 20/220
Epoch 21/220
Epoch 22/220
Epoch 23/220
Epoch 24/220
Epoch 25/220
Epoch 26/220
Epoch 27/220
Epoch 28/220
Epoch 29/220
Epoch 30/220
Epoch 31/220
Epoch 32/220
Epoch 33/220
Epoch 34/220
Epoch 35/220
Epoch 36/220
Epoch 37/220
Epoch 38/220
Epoch 39/220
Epoch 40/220
Epoch 41/220
Epoch 42/220
Epoch 43/220
Epoch 44/220
Epoch 45/220
Epoch 46/220
Epoch 47/220
Epoch 48/220
Epoch 49/220
Epoch 50/220
Epoch 51/220
Epoch 52/220
Epoch 53/220
Epoch 54/220
Epoch 55/220
Epoch 56/220
Epoch 57/220
Epoch 58/220
Epoch 59/220
Epoch 60/220
Epoch 61/220
Epoch 62/220
Epoch 63/220
Epoch 64/220
Epoch 65/220
Epoch 66/220
Epoch 67/220
Epoch 68/220
Epoch 69/220
Epoch 70/220
Epoch 71/220
Epoch 72/220
Epoch 73/220
Epoch 74/220
Epoch 75/220
Epoch 76/220
Epoch 77/220
Epoch 78

Epoch 83/220
Epoch 84/220
Epoch 85/220
Epoch 86/220
Epoch 87/220
Epoch 88/220
Epoch 89/220
Epoch 90/220
Epoch 91/220
Epoch 92/220
Epoch 93/220
Epoch 94/220
Epoch 95/220
Epoch 96/220
Epoch 97/220
Epoch 98/220
Epoch 99/220
Epoch 100/220
Epoch 101/220
Epoch 102/220
Epoch 103/220
Epoch 104/220
Epoch 105/220
Epoch 106/220
Epoch 107/220
Epoch 108/220
Epoch 109/220
Epoch 110/220
Epoch 111/220
Epoch 112/220
Epoch 113/220
Epoch 114/220
Epoch 115/220
Epoch 116/220
Epoch 117/220
Epoch 118/220
Epoch 119/220
Epoch 120/220
Epoch 121/220
Epoch 122/220
Epoch 123/220
Epoch 124/220
Epoch 125/220
Epoch 126/220
Epoch 127/220
Epoch 128/220
Epoch 129/220
Epoch 130/220
Epoch 131/220
Epoch 132/220
Epoch 133/220
Epoch 134/220
Epoch 135/220
Epoch 136/220
Epoch 137/220
Epoch 138/220
Epoch 139/220
Epoch 140/220
Epoch 141/220
Epoch 142/220
Epoch 143/220
Epoch 144/220
Epoch 145/220
Epoch 146/220
Epoch 147/220
Epoch 148/220
Epoch 149/220
Epoch 150/220
Epoch 151/220
Epoch 152/220
Epoch 153/220
Epoch 154/220
Epoch 155

Epoch 163/220
Epoch 164/220
Epoch 165/220
Epoch 166/220
Epoch 167/220
Epoch 168/220
Epoch 169/220
Epoch 170/220
Epoch 171/220
Epoch 172/220
Epoch 173/220
Epoch 174/220
Epoch 175/220
Epoch 176/220
Epoch 177/220
Epoch 178/220
Epoch 179/220
Epoch 180/220
Epoch 181/220
Epoch 182/220
Epoch 183/220
Epoch 184/220
Epoch 185/220
Epoch 186/220
Epoch 187/220
Epoch 188/220
Epoch 189/220
Epoch 190/220
Epoch 191/220
Epoch 192/220
Epoch 193/220
Epoch 194/220
Epoch 195/220
Epoch 196/220
Epoch 197/220
Epoch 198/220
Epoch 199/220
Epoch 200/220
Epoch 201/220
Epoch 202/220
Epoch 203/220
Epoch 204/220
Epoch 205/220
Epoch 206/220
Epoch 207/220
Epoch 208/220
Epoch 209/220
Epoch 210/220
Epoch 211/220
Epoch 212/220
Epoch 213/220
Epoch 214/220
Epoch 215/220
Epoch 216/220
Epoch 217/220
Epoch 218/220
Epoch 219/220
Epoch 220/220


<keras.callbacks.History at 0x7f149462b7f0>

In [399]:
X_final = np.matrix(X_final)

print(X_final.shape, type(X_final), )

(418, 7) <class 'numpy.matrixlib.defmatrix.matrix'>


In [400]:
y_final_pred = model.predict_classes(X_final)



In [401]:
y_final_pred

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0,

In [420]:
y_final = pd.DataFrame()

In [421]:
print(y_final_pred.shape, y_final.shape)

(418,) (0, 0)


In [422]:
y_final.head()

In [423]:
y_final["PassengerId"] = test.index
y_final["Survived"] = pd.Series(y_final_pred)

In [424]:
y_final.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1


In [425]:
y_final.to_csv("titanic_results_06_nn.csv", encoding='utf-8', index=False)