In [None]:
import pandas as pd
import tensorflow as tf
import csv
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Loading Titanic train and test file

In [None]:
base = pd.read_csv('../input/titanic/train.csv')
test = pd.read_csv('../input/titanic/test.csv')

In [None]:
base.head()

In [None]:
test.head()

### Find unique columns

In [None]:
base['Pclass'].unique()

In [None]:
base['Sex'].unique()

In [None]:
base['SibSp'].unique()

In [None]:
base['Parch'].unique()

In [None]:
base['Ticket'].unique()

In [None]:
base['Cabin'].unique()

In [None]:
base['Embarked'].unique()

### Replaces the nan values of columns: Embarked, Cabin

In [None]:
base['Embarked'] = base['Embarked'].fillna('miss')

In [None]:
test['Embarked'] = test['Embarked'].fillna('miss')

In [None]:
base['Embarked'].unique()

In [None]:
base['Cabin'] = base['Cabin'].fillna('miss')

In [None]:
test['Cabin'] = test['Cabin'].fillna('miss')

In [None]:
base['Cabin'].unique()

### Sets the x and y values

In [None]:
# X values are all columns except for Survived
X = base.drop('Survived', axis=1)

In [None]:
X.head()

In [None]:
### y values are only the Survived column
y = base['Survived']

In [None]:
y.head()

In [None]:
# create feature columns list
feature_columns = []

### Separate bucketized age column

In [None]:
# Graph shows how many people have by age ranges
age_hist = X.Age.hist()

In [None]:
# saves age ranges in the variable
age_boundaries = age_hist.get_xticks()

In [None]:
# convert numpy array to list
age_boundaries = age_boundaries.tolist()

In [None]:
# remove negative value from list
age_boundaries.pop(0)

In [None]:
# create age feature column
age_fc = tf.feature_column.numeric_column('Age')

In [None]:
# create bucketized age column
age_categorical = tf.feature_column.bucketized_column(
    age_fc,
    boundaries=age_boundaries
)

In [None]:
feature_columns.append(age_categorical)

### Separates categorical columns

In [None]:
list_columns_vocabulary = ['Sex', 'Embarked', 'Ticket', 'Cabin']

In [None]:
# create vocabulary list columns
vocabulary_columns = [
    tf.feature_column.categorical_column_with_vocabulary_list(
        key=c,
        vocabulary_list=X[c].unique()
    ) for c in list_columns_vocabulary
]

In [None]:
# add categorical to embedding columns
for column in vocabulary_columns:
    feature_columns.append(
        tf.feature_column.embedding_column(column, dimension=base.shape[0])
    )

### Separates numeric columns

In [None]:
list_numeric_columns = ['PassengerId', 'Pclass', 'SibSp', 'Parch']

In [None]:
for c in list_numeric_columns:
    feature_columns.append(tf.feature_column.numeric_column(key=c))

### Separates train and test data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
X_train.shape

In [None]:
X_test.shape

### Create a input_fn

In [None]:
def train_input_fn(features, labels, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batch_size)

In [None]:
def eval_input_fn(features, labels, batch_size=32):
    features = dict(features)
    if labels is None:
        inputs = features
    else:
        inputs = (features, labels)
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
    dataset = dataset.batch(batch_size)
    return dataset

### Create classifier and train data 

In [None]:
classifier = tf.estimator.DNNClassifier(
    hidden_units=[8,8,8,8],
    feature_columns=feature_columns,
    n_classes=2,
    activation_fn=tf.nn.relu,
    optimizer='Adam'
)

In [None]:
batch_size = 32
train_steps = 10000

In [None]:
classifier.train(
        input_fn=lambda:train_input_fn(X_train, y_train, batch_size),
        steps=train_steps
    )

### Evaluate model

In [None]:
eval_result = classifier.evaluate(
    input_fn=lambda:eval_input_fn(X_test, y_test, batch_size)
)

In [None]:
eval_result

### Generate predictions

In [None]:
predictions = []
for p in classifier.predict(input_fn=lambda:eval_input_fn(test, labels=None, batch_size=batch_size)):
    predictions.append(p['class_ids'])

In [None]:
predictions

### Generate submissions csv file

In [None]:
passengers = {}
_id = 892
for results in predictions:
    passengers[_id] = int(results[0])
    _id+=1

In [None]:
len(passengers)

In [None]:
csvfile = 'submission.csv'
with open(csvfile, 'w') as f:
    outcsv = csv.writer(f, delimiter= ',')
    header = ['PassengerId', 'Survived']
    outcsv.writerow(header)
    for k, v in passengers.items():
        outcsv.writerow([k, v])

In [None]:
submission = pd.read_csv(csvfile)

In [None]:
submission.head()

In [None]:
submission.shape