In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook

In [2]:
df = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [3]:
df = df.drop(['Name', 'Ticket', 'Cabin', 'Fare'], axis= 1).set_index('PassengerId')
df_test = df_test.drop(['Name', 'Ticket', 'Cabin', 'Fare'], axis= 1).set_index('PassengerId')

In [4]:
df['Age'] = df['Age'].fillna(df['Age'].median())
df_test['Age'] = df_test['Age'].fillna(df_test['Age'].median())

In [5]:
df['Embarked'] = df['Embarked'].fillna('S')
df_test['Embarked'] = df_test['Embarked'].fillna('S')

In [6]:
X_df = df.drop('Survived', axis = 1)
Y = df['Survived'].copy()

In [7]:
X_df = X_df.join(pd.get_dummies(X_df[['Embarked', 'Sex']])).drop(['Embarked', 'Sex'], axis = 1)
df_test = df_test.join(pd.get_dummies(df_test[['Embarked', 'Sex']])).drop(['Embarked', 'Sex'], axis = 1)

In [8]:
age_group = [-1,0,5,12,18,35,60,100]
age_label = ["Missing","Infant","Child","Teenager","Young Adult","Adult","Senior"]

X_df['Age'] = pd.cut(X_df['Age'], age_group, labels=age_label)
X_df = X_df.join(pd.get_dummies(X_df['Age'])).drop('Age', axis = 1)

df_test['Age'] = pd.cut(df_test['Age'], age_group, labels=age_label)
df_test = df_test.join(pd.get_dummies(df_test['Age'])).drop('Age', axis = 1)

In [10]:
X_train

Unnamed: 0_level_0,Pclass,SibSp,Parch,Embarked_C,Embarked_Q,Embarked_S,Sex_female,Sex_male,Missing,Infant,Child,Teenager,Young Adult,Adult,Senior
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
47,3,1,0,0,1,0,0,1,0,0,0,0,1,0,0
228,3,0,0,0,0,1,0,1,0,0,0,0,1,0,0
185,3,0,2,0,0,1,1,0,0,1,0,0,0,0,0
206,3,0,1,0,0,1,1,0,0,1,0,0,0,0,0
844,3,0,0,1,0,0,0,1,0,0,0,0,1,0,0
4,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0
454,1,1,0,1,0,0,0,1,0,0,0,0,0,1,0
67,2,0,0,0,0,1,1,0,0,0,0,0,1,0,0
176,3,1,1,0,0,1,0,1,0,0,0,1,0,0,0
714,3,0,0,0,0,1,0,1,0,0,0,0,1,0,0


In [11]:
Y_train

PassengerId
47     0
228    0
185    1
206    0
844    0
4      1
454    1
67     1
176    0
714    0
362    0
586    1
243    0
823    0
798    1
477    0
653    0
578    1
700    0
127    0
640    0
795    0
505    1
60     0
890    1
2      1
319    1
680    1
311    1
58     0
      ..
94     0
292    1
478    0
796    0
356    0
32     1
368    1
760    1
778    1
886    0
15     0
554    1
272    1
180    0
711    1
9      1
660    0
30     0
451    0
410    0
487    1
597    1
889    0
317    1
647    0
267    0
513    1
812    0
107    1
721    1
Name: Survived, Length: 801, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X_df, Y, train_size = 0.9)



In [417]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100), learning_rate_init=0.0001)

In [418]:
clf.fit(X_train, Y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100, 100, 100), learning_rate='constant',
       learning_rate_init=0.0001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [419]:
clf.score(X_train, Y_train)

0.8064918851435705

In [420]:
clf.score(X_test, Y_test)

0.8111111111111111

In [422]:
pd.DataFrame(clf.predict(df_test), columns=['Survived']).set_index(df_test.index).to_csv('Titanic_1')

In [425]:
clf.predict(df_test)

array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,