In [1]:
import numpy as np
import pandas as pd

from nutshell import ModelData, Learner, Predictor

Using TensorFlow backend.


## Train model

In [2]:
data = ModelData(pd.read_csv('titanic_train.csv'))
data.category_columns = ['Survived', 'Pclass', 'Sex', 'Embarked']
data.numeric_columns = ['Age', 'SibSp', 'Parch', 'Fare']
data.label_column = 'Survived'
data.key_column = 'PassengerId'
data.prepare_data()
data.validation_split = .10
data.split_data(shuffle=True)

Tokenizing category columns...
Survived 2
Pclass 3
Sex 2
Embarked 4
Imputing and scaling numeric columns...
Age
SibSp
Parch
Fare
Done preparing data
Training examples: 802
Validation examples: 89


In [8]:
learner = Learner(data)
learner.label_type = 'binary'
learner.hidden_layers = 1
learner.dropout_rate = .30
learner.build_model()

Non-Sequential Merge Layer Shape: (?, 11)
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_Pclass (InputLayer)        (None, 1)             0                                            
____________________________________________________________________________________________________
input_Sex (InputLayer)           (None, 1)             0                                            
____________________________________________________________________________________________________
input_Embarked (InputLayer)      (None, 1)             0                                            
____________________________________________________________________________________________________
embed_Pclass (Embedding)         (None, 1, 2)          10          input_Pclass[0][0]               
_________________________________________________

In [9]:
learner.train_model(filename='titanic', epochs=10, super_epochs=4)

Super Epoch: 1
Learning Rate: 0.001
Train on 802 samples, validate on 89 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Super Epoch: 2
Learning Rate: 0.0001
Train on 802 samples, validate on 89 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Super Epoch: 3
Learning Rate: 1e-05
Train on 802 samples, validate on 89 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Super Epoch: 4
Learning Rate: 1.0000000000000002e-06
Train on 802 samples, validate on 89 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


## Use trained model to score test data

In [10]:
# now use the trained model and make predictions on test data
test_data = ModelData(pd.read_csv('titanic_test.csv'), settings_filename = 'titanic_settings')
test_data.prepare_data()
predictor = Predictor('titanic_model', test_data)


Tokenizing category columns...
** Using pre-defined token map **
Pclass 3
Sex 2
Embarked 4
Imputing and scaling numeric columns...
** Using pre-defined impute/scale metadata **
Age
SibSp
Parch
Fare
Done preparing data


In [11]:
# make predictions on test data
predictor.score()

# convert probability score to a 0 or 1 for submission
predictor.modeldata.prep_data['Survived'] = predictor.modeldata.prep_data['score'].apply(lambda x: 0 if x<.5 else 1)

print(predictor.modeldata.prep_data[['PassengerId', 'score', 'Survived']][0:10])

 32/418 [=>............................] - ETA: 1s
Done scoring
   PassengerId     score  Survived
0          892  0.169405         0
1          893  0.587477         1
2          894  0.217067         0
3          895  0.177683         0
4          896  0.502400         1
5          897  0.243897         0
6          898  0.702010         1
7          899  0.354489         0
8          900  0.713587         1
9          901  0.146013         0


In [13]:
# write submission file
predictor.modeldata.write_csv(['PassengerId', 'Survived'], 'titanic_submission1.csv')