# Loading Dependencies

In [None]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

import pandas as pd
print("Pandas version:", pd.__version__)

TensorFlow version: 2.15.0
Pandas version: 1.5.3


# Loading Training Data

## Downloading Training Data File

Source is: https://storage.googleapis.com/tf-datasets/titanic/train.csv

In [None]:
titanic_train_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")

Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv


## Parsing Training Data File


In [None]:
titanic_train_dataFrame = pd.read_csv(titanic_train_file)
titanic_train_dataFrame.head()

## Training DataFrame Shape

In [None]:
titanic_train_dataFrame.shape

(627, 10)

# Loading Testing Data

## Downloading Testing Data File

Source is: https://storage.googleapis.com/tf-datasets/titanic/eval.csv

In [None]:
titanic_test_file = tf.keras.utils.get_file("eval.csv", "https://storage.googleapis.com/tf-datasets/titanic/eval.csv")

Downloading data from https://storage.googleapis.com/tf-datasets/titanic/eval.csv


## Parsing Testing Data file

In [None]:
titanic_test_dataFrame = pd.read_csv(titanic_test_file)
titanic_test_dataFrame.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,35.0,0,0,8.05,Third,unknown,Southampton,y
1,0,male,54.0,0,0,51.8625,First,E,Southampton,y
2,1,female,58.0,0,0,26.55,First,C,Southampton,y
3,1,female,55.0,0,0,16.0,Second,unknown,Southampton,y
4,1,male,34.0,0,0,13.0,Second,D,Southampton,y


## Testing DataFrame Shape

In [None]:
titanic_test_dataFrame.shape

(264, 10)

# Creating The Survived Series

This removes (pops) the survived colum from the target dataframe and creates a new series.

In [None]:
training_survived_series = titanic_train_dataFrame.pop('survived')
testing_survived_series = titanic_test_dataFrame.pop('survived')

print("titanic_train_dataFrame:", titanic_train_dataFrame.shape)
print("titanic_test_dataFrame:", titanic_test_dataFrame.shape)

titanic_train_dataFrame: (627, 9)
titanic_test_dataFrame: (264, 9)


# Creating Feature Columns

## Defining Columns

We'll use these column arrays to create our feature columns.




In [None]:
category_columns = ["sex", "parch", "n_siblings_spouses", "class", "deck", "embark_town", "alone"]
numeric_columns = [ "fare", "age"]

feature_columns = []

## Creating Feature Columns for Category Columns


In [None]:
for feature_name in category_columns:
   vocabulary = titanic_train_dataFrame[feature_name].unique()
   feature_column = tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)
   print(feature_column)
   feature_columns.append(feature_column)

Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.


VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0)
VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0)
VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0)
VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0)
VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0)
VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0)
VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, default_va

## Creating Feature Columns for Numeric Columns

In [None]:
for feature_name in numeric_columns:
  feature_column = tf.feature_column.numeric_column(feature_name, dtype=tf.float32)
  print(feature_column)
  feature_columns.append(feature_column)

Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.


NumericColumn(key='fare', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)
NumericColumn(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)


## Creating A Bucketized Column for Age

In [None]:
age_feature_column = next(filter(lambda featureColumn: featureColumn.key == "age", feature_columns), None)
age_buckets = tf.feature_column.bucketized_column(age_feature_column, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
feature_columns.append(age_buckets)

Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.


## Validating Feature Columns

In [None]:
print("Category Column Count:", len(category_columns))
print("Numeric Column Count:", len(numeric_columns))
print("Feature Column Count:", len(feature_columns))


Category Column Count: 7
Numeric Column Count: 2
Feature Column Count: 10


#  Training the model



## Defining the Input Function


In [None]:
def make_input_fn(data_df, label_series, num_epochs=30, shuffle=True, batch_size=32):
  def input_function():
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_series))
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(num_epochs)
    return ds
  return input_function

## Creating the Training and Testing Functions

In [None]:
training_input_fn = make_input_fn(titanic_train_dataFrame, training_survived_series)
testing_input_fn = make_input_fn(titanic_test_dataFrame, testing_survived_series, 1, False)

## Training and Evaluating the Model

We'll be using the linear regression algorithm (LinearClassifier)

Other estimators are availble: https://www.tensorflow.org/guide/estimator

In [None]:
# Creating the model
linear_est = tf.estimator.LinearClassifier(feature_columns)
# Training the model
linear_est.train(training_input_fn)
# Evaluating the model
modelStats = linear_est.evaluate(testing_input_fn)




## Validating the Model

In [None]:
# The higher the accuracy the better
print("Accuracy:", modelStats["accuracy"])
print("Area Under the Curve (auc):", modelStats["auc"])
print(modelStats)

Accuracy: 0.7613636
Area Under the Curve (auc): 0.8397919
{'accuracy': 0.7613636, 'accuracy_baseline': 0.625, 'auc': 0.8397919, 'auc_precision_recall': 0.7893599, 'average_loss': 0.46650088, 'label/mean': 0.375, 'loss': 0.45552766, 'precision': 0.6730769, 'prediction/mean': 0.3945349, 'recall': 0.7070707, 'global_step': 600}


# Making predictions using the model

## Creating a list of predictions

In [None]:
predictions = list(linear_est.predict(testing_input_fn))

def predictionEvaluationFn(recordNum):
  print(titanic_test_dataFrame.loc[recordNum])
  print("Chances of surviving:", predictions[recordNum]["probabilities"][1])
  print("Chances of dieing:", predictions[recordNum]["probabilities"][0])
  print("Did they actually survive:", testing_survived_series[recordNum])

Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.
Instructions for updating:
Use tf.keras instead.


# Looking through the list

## Looking at the stats of a young female who survived

In [None]:
predictionEvaluationFn(5)

sex                       female
age                         15.0
n_siblings_spouses             0
parch                          0
fare                      8.0292
class                      Third
deck                     unknown
embark_town           Queenstown
alone                          y
Name: 5, dtype: object
Chances of surviving: 0.8377103
Chances of dieing: 0.16228968
Did they actually survive: 1


# Looking at the stats of a young female who didn't survive

In [None]:
predictionEvaluationFn(35)

sex                          male
age                          27.0
n_siblings_spouses              0
parch                           0
fare                       7.7958
class                       Third
deck                      unknown
embark_town           Southampton
alone                           y
Name: 35, dtype: object
Chances of surviving: 0.09117461
Chances of dieing: 0.9088254
Did they actually survive: 1
