In [1]:
import random
import pandas
import numpy as np

import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib import learn

## Read Data

- Read CSV file and print out 5 random examples

In [2]:
random.seed(42)

data = pandas.read_csv('titanic_dataset.csv')
rows = random.sample(list(data), 5)

data.ix[rows]

Unnamed: 0,survived,pclass,name,sex,age,sibsp,parch,ticket,fare
pclass,,,,,,,,,
survived,,,,,,,,,
sibsp,,,,,,,,,
name,,,,,,,,,
fare,,,,,,,,,


## Preprocess Data

- Removed name and ticket ID as they won't help much in prediction

In [14]:
def preprocess(data):
    train = data.drop(['survived', 'name', 'ticket'], axis=1).values
    
    for i in range(len(train)):
        train[i][1] = 1. if train[i][1] == 'female' else 0
   
    return train

In [15]:
#Prepare training data
x_train  = preprocess(data)
y_train = data['survived']

## Define and train the model

In [34]:

classifier = learn.LinearClassifier(n_classes=2, 
                                    feature_columns=learn.infer_real_valued_columns_from_input(x_train), 
                                    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05),
                                   model_dir='model')

classifier.fit(x_train, y_train, batch_size=128, steps=500)


Explicitly set `enable_centered_bias` to 'True' if you want to keep existing behaviour.


Estimator(params={'joint_weights': False, 'num_ps_replicas': 0, 'gradient_clip_norm': None, 'feature_columns': [_RealValuedColumn(column_name='', dimension=6, default_value=None, dtype=tf.float64, normalizer=None)], 'enable_centered_bias': True, 'n_classes': 2, 'weight_column_name': None, 'optimizer': <tensorflow.python.training.gradient_descent.GradientDescentOptimizer object at 0x7f9c09d027f0>})

## Let's Predict if the survival chances of our beloved Rose & Jack !!

**Jack** , **19** year old, is **male**, a **3rd** class ticket holder and let's say his ticket fare is **$5**.
No Siblings, spouse nor parents aboard.

**Rose** , **17** year old, is **female**, a 1st class passenger who holds an expensive **$100** ticket.
Rose's parents and fiance are aboard.

Let's predict the survival of Cal too! <br/>
**Cal** , Rose's **fiance**, is **30** year old, **male**, **1st class** passenger and holds **$100** ticket.

In [6]:
#class, gender, age, sibling/spouse, parents, fare
Jack = [3,0.0, 19, 0, 0, 5.0000]
Rose = [1, 1.0, 17, 1, 2, 100.0000]
Cal = [1, 0.0, 30, 1, 0, 100.0]

In [7]:
test = np.array([Jack, Rose, Cal])
pred = classifier.predict(test)
prob = classifier.predict_proba(test)

Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.


In [8]:
answer = ['No', 'Yes']
print("Will Jack Survive? %s" % answer[pred[0]])
print("Will Rose Survive? %s" % answer[pred[1]])
print("Will Cal Survive? %s" % answer[pred[2]])

print("\nJack's Surviving Chance: %f%%" % (prob[0][1]*100))
print("Rose's Surviving Chance: %f%%" % (prob[1][1]*100))
print("Cal's Surviving Chance: %f%%" % (prob[2][1]*100))

Will Jack Survive? No
Will Rose Survive? Yes
Will Cal Survive? Yes

Jack's Surviving Chance: 0.084907%
Rose's Surviving Chance: 100.000000%
Cal's Surviving Chance: 99.999988%
