## Core Learning Algorithms
    

### Setup and Imports

#### Install packages if needed

In [None]:
##!pip install -q sklearn
##!pip install future
##!pip install IPython


In [None]:
%tensorflow_version 2.x

#### Import packages

In [None]:
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf

from IPython.display import clear_output
from six.moves import urllib


### Training and testing Data


In [None]:
# Load Datasets

dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') # testing data

# Preprocess data
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

CATEGORICAL_COLUMNS = ('sex', 'n_siblings_spouses', 'parch', 'class', 'deck',
                       'embark_town', 'alone')

NUMERICAL_COLUMNS = ('age', 'fare')

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = dftrain[feature_name].unique() # gets a list of all unique values from given feature column
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list
                           (feature_name, vocabulary))
    
for feature_name in NUMERICAL_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
    
print(feature_columns)
    










In [None]:
dftrain["embark_town"].unique() # get unique values of fields

##### Compare and print first row

In [None]:
print(dftrain.loc[0], y_train.loc[0])

#### Explore data

In [None]:
dftrain.head()

In [None]:
dftrain.describe()

In [None]:
dftrain.shape

In [None]:
y_train.head()

In [None]:
dftrain.age.hist(bins=20)

In [None]:
dftrain.sex.value_counts().plot(kind='barh')

In [None]:
dftrain['class'].value_counts().plot(kind='barh')

In [None]:
pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')

In [None]:
dfeval.shape

## Training Process

In [None]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function(): # inner function, this will be returned
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) # create tf.data.Dataset object 
                                                                       # with data at its label
    if shuffle:
      ds = ds.shuffle(1000) # randomise order of data 
    ds = ds.batch(batch_size).repeat(num_epochs) # split dataset into batches of 32 and 
                                                 # repeat process for number of epochss
    return ds # return a batch of the dataset
  return input_function # return a function object for use

train_input_fn = make_input_fn(dftrain, y_train) # here we will call the input_function that was 
                                                 # returned to us to get a dataset object we can  
                                                 # feed to the model
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)





## Creating the Model

In [None]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn) # train
result = linear_est.evaluate(eval_input_fn) # get model metrics/stats by testing on testing data

clear_output() # clears console output
print(result['accuracy']) # the result variable is simply a dict of stats about our model
print(result)

In [None]:
result = list(linear_est.predict(eval_input_fn))
print(dfeval.loc[3])
print(y_eval.loc[3])
print(result[3]['probabilities'][0])