In [76]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [77]:
dataset_df = pd.read_csv('./input/train.csv')

In [78]:
dataset_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [79]:
dataset_df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [80]:
msk = np.random.rand(len(dataset_df)) < 0.9
train_df = dataset_df[msk]
test_df = dataset_df[~msk]

print("trains size is ", len(train_df.index))
print("test size is ", len(test_df.index))
print("total size is ", len(dataset_df.index))

trains size is  808
test size is  83
total size is  891


In [81]:
age_column = tf.feature_column.numeric_column(key="age")
sex_column = tf.feature_column.categorical_column_with_vocabulary_list(key="sex", vocabulary_list=["male", "female"])
pclass_column = tf.feature_column.categorical_column_with_identity(key="pclass", num_buckets=4)

feature_columns = [
    tf.feature_column.numeric_column(key="fare"),
    tf.feature_column.bucketized_column(source_column=age_column,boundaries=[12]),
    tf.feature_column.indicator_column(sex_column),
    tf.feature_column.indicator_column(pclass_column),
]

In [82]:
estimator = tf.estimator.DNNClassifier(
    n_classes=2,
    hidden_units=[10,10],
    feature_columns=feature_columns,
    model_dir="./output/logs"
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './output/logs', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f750ffac198>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [83]:
def pd_input_fn(df, shuffle, batch):
    
    def input_fn():
        features = {
            "fare" : df.Fare.values.tolist(),
            "age" : df.Age.values.tolist(),
            "sex" : df.Sex.values.tolist(),
            "pclass" : df.Pclass.values.tolist()
        }
        labels = df.Survived.values.tolist()
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
        if shuffle:
            dataset = dataset.shuffle(1000)
        dataset = dataset.repeat().batch(batch)
        return dataset  
    
    return input_fn

In [100]:
estimator.train(input_fn=pd_input_fn(train_df, True, 300), steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./output/logs/model.ckpt-6000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 6000 into ./output/logs/model.ckpt.
INFO:tensorflow:loss = 122.7231, step = 6001
INFO:tensorflow:global_step/sec: 453.843
INFO:tensorflow:loss = 122.89024, step = 6101 (0.221 sec)
INFO:tensorflow:global_step/sec: 596.847
INFO:tensorflow:loss = 107.30429, step = 6201 (0.168 sec)
INFO:tensorflow:global_step/sec: 616.288
INFO:tensorflow:loss = 129.3673, step = 6301 (0.162 sec)
INFO:tensorflow:global_step/sec: 552.136
INFO:tensorflow:loss = 116.63866, step = 6401 (0.182 sec)
INFO:tensorflow:global_step/sec: 561.043
INFO:tensorflow:loss = 121.26926, step = 6501 (0.179 sec)
INFO:tensorflow:global_step/sec: 602.676
INFO:tensorflow:loss = 112.69357, st

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x7f750ffacdd8>

In [101]:
estimator.evaluate(input_fn=pd_input_fn(test_df, False, len(test_df.index)), steps=1)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-25T14:21:05Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./output/logs/model.ckpt-7000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2019-03-25-14:21:05
INFO:tensorflow:Saving dict for global step 7000: accuracy = 0.85542166, accuracy_baseline = 0.6626506, auc = 0.862013, auc_precision_recall = 0.7791461, average_loss = 0.50701916, global_step = 7000, label/mean = 0.33734939, loss = 42.082592, precision = 0.76666665, prediction/mean = 0.40839216, recall = 0.8214286
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 7000: ./output/logs/model.ckpt-7000


{'accuracy': 0.85542166,
 'accuracy_baseline': 0.6626506,
 'auc': 0.862013,
 'auc_precision_recall': 0.7791461,
 'average_loss': 0.50701916,
 'global_step': 7000,
 'label/mean': 0.33734939,
 'loss': 42.082592,
 'precision': 0.76666665,
 'prediction/mean': 0.40839216,
 'recall': 0.8214286}

In [102]:
estimator.evaluate(input_fn=pd_input_fn(train_df, False, len(train_df.index)), steps=1)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-25T14:21:09Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./output/logs/model.ckpt-7000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2019-03-25-14:21:10
INFO:tensorflow:Saving dict for global step 7000: accuracy = 0.82054454, accuracy_baseline = 0.6113862, auc = 0.8812791, auc_precision_recall = 0.8678181, average_loss = 0.393929, global_step = 7000, label/mean = 0.38861385, loss = 318.29465, precision = 0.8506224, prediction/mean = 0.37955648, recall = 0.65286624
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 7000: ./output/logs/model.ckpt-7000


{'accuracy': 0.82054454,
 'accuracy_baseline': 0.6113862,
 'auc': 0.8812791,
 'auc_precision_recall': 0.8678181,
 'average_loss': 0.393929,
 'global_step': 7000,
 'label/mean': 0.38861385,
 'loss': 318.29465,
 'precision': 0.8506224,
 'prediction/mean': 0.37955648,
 'recall': 0.65286624}

In [103]:
predict_df = pd.read_csv('./input/test.csv')

In [104]:
predict_df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [105]:
def pd_predict_input_fn(df):
    
    def input_fn():
        features = {
            "fare" : df.Fare.values.tolist(),
            "age" : df.Age.values.tolist(),
            "sex" : df.Sex.values.tolist(),
            "pclass" : df.Pclass.values.tolist()
        }
        dataset = tf.data.Dataset.from_tensor_slices((dict(features)))
        dataset = dataset.batch(len(df.index))
        return dataset  
    
    return input_fn

In [106]:
predictions = estimator.predict(input_fn=pd_predict_input_fn(predict_df))
predictions_list = list(predictions)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./output/logs/model.ckpt-7000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [107]:
result_df = pd.DataFrame(
    columns=['PassengerId', 'Survived'], 
    data={
        'PassengerId' : predict_df.PassengerId.values.tolist(),
        'Survived' : [prediction['class_ids'][0] for prediction in predictions_list]
    }
)

In [108]:
result_df.to_csv('./output/prediction.csv', index=False)