In [2]:
!pip install -q sklearn

In [None]:
%tensorflow_version 2.x # this line is not required unless you are in a notebook

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc

import tensorflow as tf

Fetching data

In [34]:
df_train = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
df_eval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') # testing data

# print(df_train.head())
# print(df_eval.head())

# pop a column of the table
label_train = df_train.pop('survived')
label_eval = df_eval.pop('survived')

# print(df_train.head())
# print(df_eval.head())

# print(df_train.loc[0]) # Row selection
# print(df_train["age"]) # Col selection

CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

Some methods of **pandas** data frame objects, data visualisation 

In [None]:
df_train.head()

In [None]:
df_train.describe()

In [None]:
df_train.shape

In [None]:
df_train.age.hist(bins=20) # A histogram of the age

In [None]:
df_train.sex.value_counts().plot(kind='barh') # Visualize sex

In [None]:
df_train['class'].value_counts().plot(kind='barh') # Visualize class

In [None]:
pd.concat([df_train, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive') # Visualize relationships

Craeting Feature Columns

In [35]:
feature_columns = []

for feature_name in CATEGORICAL_COLUMNS:

  vocabulary = df_train[feature_name].unique()

  # 
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:

  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

# print(feature_columns)


Input Function

In [36]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):

  def input_function(): 
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  # create tf.data.Dataset object with data and its label
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(num_epochs)  # split dataset into batches of 32 and repeat process for number of epochs
    return ds  

  return input_function 

In [37]:
train_input_fn = make_input_fn(df_train, y_train) 
eval_input_fn = make_input_fn(df_eval, y_eval, num_epochs=1, shuffle=False)

Creating the model, Train and Evaluate

In [None]:
linear_estimate = tf.estimator.LinearClassifier(feature_columns=feature_columns)

linear_estimate.train(train_input_fn) # Train
result = linear_estimate.evaluate(eval_input_fn) # Evaluate

clear_output()
print(result)

Predictions

In [None]:
result = list(linear_estimate.predict(eval_input_fn))

print(df_eval.loc[6]) # The features for a given person
print(y_eval.loc[6]) # The label for a given person
print(result[6]['probabilities'][1]) # The model's prediction of survival for that person