In [112]:
import tensorflow as tf
import pandas as pd
from IPython.display import display
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import LabelEncoder
import re

tf.logging.set_verbosity(tf.logging.WARN)

leTitle = LabelEncoder()
leTitle.fit(["Mr", "Mrs", 'Capt', 'Col', 'Countess', 'Don',
             'Dr', 'Jonkheer', 'Lady', 'Major', 'Master',
             'Miss', 'Mlle', 'Mme', 'Ms', 'Rev', 'Sir', 'Dona'])
def process(df):
    def add_title(cell): return re.search(' ([A-Za-z]+)\\.', cell).group(1)
    df["Title"] = df.apply(lambda row: add_title(row["Name"]), axis=1)
    df["Title"] = leTitle.transform(df["Title"])
    df.drop(labels=["PassengerId", "Name", "Cabin", "Ticket"], axis=1, inplace=True)
    df['Sex'].replace(["female", "male"], [0, 1], inplace=True)
    df['Embarked'].replace(["Q", "C", "S"], [0, 1, 2], inplace=True)
    for column in ["Age", "SibSp", "Parch", "Fare", "Embarked", "Sex", "Pclass", "Title"]:
        imputer = Imputer()
        mmscal = MinMaxScaler()
        df[column] = imputer.fit_transform(df[column].values.reshape(-1, 1))
        df[column] = mmscal.fit_transform(df[column].values.reshape(-1, 1))
    if "Survived" in df:
        df = df[pd.notnull(df['SibSp'])]
    return df

def eval_input_fn(features, labels, batch_size):
    features = dict(features)
    if labels is None:
        inputs = features
    else:
        inputs = (features, labels)
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)
    return dataset

def train_input_fn(features, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    return dataset

In [113]:
train_data = process(df = pd.read_csv(r"../input/train.csv"))
test_data = pd.read_csv(r"../input/test.csv")
pid = test_data["PassengerId"]
test_data = process(df = test_data)
train_x, train_y = train_data, train_data.pop("Survived")

my_feature_columns = []
for key in train_x.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
    
display(test_data[:15])

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
0,1.0,1.0,0.452723,0.0,0.0,0.015282,0.0,0.8
1,1.0,0.0,0.617566,0.125,0.0,0.013663,1.0,0.866667
2,0.5,1.0,0.815377,0.0,0.0,0.018909,0.0,0.8
3,1.0,1.0,0.353818,0.0,0.0,0.016908,1.0,0.8
4,1.0,0.0,0.287881,0.125,0.111111,0.023984,1.0,0.866667
5,1.0,1.0,0.182382,0.0,0.0,0.018006,1.0,0.8
6,1.0,0.0,0.39338,0.0,0.0,0.014891,0.0,0.6
7,0.5,1.0,0.34063,0.125,0.111111,0.056604,1.0,0.8
8,1.0,0.0,0.235131,0.0,0.0,0.01411,0.5,0.866667
9,1.0,1.0,0.274693,0.25,0.0,0.047138,1.0,0.8


In [115]:
classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[66, 66, 66],
    optimizer=tf.train.ProximalAdagradOptimizer(
      learning_rate=0.1,
      l1_regularization_strength=0.001
    )
)

classifier.train(
    input_fn = lambda:train_input_fn(train_x, train_y, 500),
    steps = 5000
)
eval_result = classifier.evaluate(
    input_fn=lambda:eval_input_fn(train_x, train_y, 100)
)
print(eval_result)

{'accuracy': 0.9023569, 'accuracy_baseline': 0.6161616, 'auc': 0.9584918, 'auc_precision_recall': 0.9511346, 'average_loss': 0.23288493, 'label/mean': 0.3838384, 'loss': 23.055609, 'prediction/mean': 0.40533426, 'global_step': 5000}


In [127]:
predictions = classifier.predict(input_fn=lambda:eval_input_fn(test_data,labels=None,batch_size=100))
template = '{},{}'
for pred_dict, p in zip(predictions, pid):
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]
    print(template.format(p, class_id))

892,0
893,0
894,0
895,0
896,0
897,0
898,0
899,1
900,1
901,0
902,0
903,0
904,1
905,0
906,1
907,1
908,0
909,0
910,0
911,1
912,1
913,1
914,1
915,1
916,0
917,0
918,1
919,0
920,0
921,0
922,0
923,0
924,1
925,1
926,1
927,0
928,0
929,1
930,0
931,1
932,0
933,1
934,0
935,1
936,1
937,0
938,0
939,0
940,1
941,1
942,0
943,0
944,1
945,1
946,0
947,0
948,0
949,0
950,0
951,1
952,0
953,0
954,0
955,1
956,1
957,1
958,0
959,0
960,1
961,0
962,1
963,0
964,0
965,0
966,1
967,0
968,0
969,1
970,0
971,1
972,1
973,0
974,1
975,0
976,0
977,0
978,1
979,1
980,1
981,1
982,0
983,0
984,1
985,0
986,0
987,0
988,1
989,0
990,1
991,0
992,1
993,0
994,0
995,0
996,1
997,0
998,0
999,0
1000,0
1001,0
1002,0
1003,0
1004,1
1005,0
1006,1
1007,0
1008,0
1009,1
1010,0
1011,1
1012,1
1013,0
1014,1
1015,0
1016,0
1017,0
1018,0
1019,1
1020,1
1021,0
1022,0
1023,0
1024,0
1025,0
1026,0
1027,0
1028,0
1029,0
1030,0
1031,0
1032,0
1033,1
1034,0
1035,1
1036,1
1037,0
1038,0
1039,0
1040,1
1041,1
1042,1
1043,0
1044,0
1045,1
1046,0
1047,0
1048,1
1049,1
10