# Neural Network Titanic Data approach

This is my initial attempt to get a Neural Net up and running around the titanic data.

In [70]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix

In [7]:
passengers_data = pd.read_csv("titanic_data/train.csv", index_col="PassengerId")
passengers_data.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


The first thing we can do is look at gender and age. We can translater `gender` to `isMale` and we can scale and normalize `age`.

In [15]:
passengers_data[["Survived", "Sex", "Age"]].head()

Unnamed: 0_level_0,Survived,Sex,Age
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0,male,22.0
2,1,female,38.0
3,1,female,26.0
4,1,female,35.0
5,0,male,35.0


In [21]:
features = passengers_data[["Sex", "Age"]]
result = passengers_data[["Survived"]]

X_train, X_test, y_train, y_test = train_test_split(features, result)

In [38]:
scaler = StandardScaler()
X_train_only_age = X_train[["Age"]].fillna(value=0, axis=1)
# X_train_only_age.Age.unique()
scaler.fit(X_train_only_age)
scaled_X_train_only_age = scaler.transform(X_train_only_age)
scaled_X_test_only_age = scaler.transform(X_test[["Age"]].fillna(value=0, axis=1))

print(scaled_X_train_only_age.shape)
print(scaled_X_test_only_age.shape)

(668, 1)
(223, 1)


In [52]:
X_train_is_male = X_train[["Sex"]].replace({"Sex": {"male": True, "female": False}}).values
X_test_is_male = X_test[["Sex"]].replace({"Sex": {"male": True, "female": False}}).values


X_test_is_male.shape

(223, 1)

In [64]:
import numpy as np

a = np.array([[1], [2], [3]])
b = np.array([[4], [5], [6]])

X_train_scaled = np.concatenate((scaled_X_train_only_age, X_train_is_male), axis=1)
X_test_scaled = np.concatenate((scaled_X_test_only_age, X_test_is_male), axis=1)

X_train_scaled.shape

(668, 2)

In [66]:
mlp = MLPClassifier(hidden_layer_sizes=(13),max_iter=500,verbose=True)
mlp.fit(X_train_scaled,y_train)

Iteration 1, loss = 0.69693142
Iteration 2, loss = 0.69251794
Iteration 3, loss = 0.68859848
Iteration 4, loss = 0.68472816
Iteration 5, loss = 0.68119525
Iteration 6, loss = 0.67776609
Iteration 7, loss = 0.67467497
Iteration 8, loss = 0.67162491
Iteration 9, loss = 0.66869537
Iteration 10, loss = 0.66590128
Iteration 11, loss = 0.66308964
Iteration 12, loss = 0.66048421
Iteration 13, loss = 0.65810817
Iteration 14, loss = 0.65550279
Iteration 15, loss = 0.65300476
Iteration 16, loss = 0.65063779
Iteration 17, loss = 0.64825791
Iteration 18, loss = 0.64597722
Iteration 19, loss = 0.64360583
Iteration 20, loss = 0.64145640
Iteration 21, loss = 0.63920050
Iteration 22, loss = 0.63704875
Iteration 23, loss = 0.63496230
Iteration 24, loss = 0.63280947
Iteration 25, loss = 0.63095469
Iteration 26, loss = 0.62885212
Iteration 27, loss = 0.62693646
Iteration 28, loss = 0.62503501
Iteration 29, loss = 0.62306251
Iteration 30, loss = 0.62111932
Iteration 31, loss = 0.61920767
Iteration 32, los

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=13, learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=True, warm_start=False)

In [68]:
predictions = mlp.predict(X_test_scaled)
predictions

array([0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0])

In [71]:
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.77      0.83      0.80       130
          1       0.73      0.65      0.69        93

avg / total       0.75      0.75      0.75       223

