#Tensorflow

## Introduction

### Basics

In [1]:
import tensorflow as tf  
print(tf.__version__)  

2.9.2


### Tensors

In [2]:
tensor1 = tf.random.normal([1,2,3])
tensor2 = tf.reshape(tensor1, [2,3,1]) 
tensor3 = tf.reshape(tensor2, [3, -1])

print(f"Tensor 1 shape is {tensor1.shape}, so rank/degree {tensor1.ndim}")
print(f"Tensor 2 shape is {tensor2.shape}")
print(f"Tensor 3 shape is {tensor3.shape}")

print(tensor1)
print(tensor2)
print(tensor3)

Tensor 1 shape is (1, 2, 3), so rank/degree 3
Tensor 2 shape is (2, 3, 1)
Tensor 3 shape is (3, 2)
tf.Tensor(
[[[-1.0778204  -0.04934228  0.17698595]
  [ 0.8112834  -0.48283577  1.9005011 ]]], shape=(1, 2, 3), dtype=float32)
tf.Tensor(
[[[-1.0778204 ]
  [-0.04934228]
  [ 0.17698595]]

 [[ 0.8112834 ]
  [-0.48283577]
  [ 1.9005011 ]]], shape=(2, 3, 1), dtype=float32)
tf.Tensor(
[[-1.0778204  -0.04934228]
 [ 0.17698595  0.8112834 ]
 [-0.48283577  1.9005011 ]], shape=(3, 2), dtype=float32)


In [3]:
# subsetting
matrix = [[1,2,3,4,5],
          [6,7,8,9,10],
          [11,12,13,14,15],
          [16,17,18,19,20]]

tensor = tf.Variable(matrix, dtype=tf.int32) 
print(f"rank: {tf.rank(tensor)}, shape: {tensor.shape}")

print(f"3rd element from row 1: {tensor[0,2]}")
print(f"row 1: {tensor[0]}")
print(f"column 1: {tensor[:, 0]}")
print(f"row 2 and 4: {tensor[1::2]}")
print(f"column 1 in row 2 and 3: {tensor[1:3,0]}")

rank: 2, shape: (4, 5)
3rd element from row 1: 3
row 1: [1 2 3 4 5]
column 1: [ 1  6 11 16]
row 2 and 4: [[ 6  7  8  9 10]
 [16 17 18 19 20]]
column 1 in row 2 and 3: [ 6 11]


## Basic learning algorithms

Here we use tf.Estimator, with production-ready models.

Normally we would prototype with tf.keras using Sequential API, Functional API or Model Subclassing, then eventually convert to estimator.

### Linear regression

In [4]:
import numpy as np
import pandas as pd
from IPython.display import clear_output

import tensorflow as tf

In [5]:
# load the titanic dataset
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') 
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') 

y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

In [6]:
dftrain.info()
# dftrain.head()
# dftrain.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 627 entries, 0 to 626
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   sex                 627 non-null    object 
 1   age                 627 non-null    float64
 2   n_siblings_spouses  627 non-null    int64  
 3   parch               627 non-null    int64  
 4   fare                627 non-null    float64
 5   class               627 non-null    object 
 6   deck                627 non-null    object 
 7   embark_town         627 non-null    object 
 8   alone               627 non-null    object 
dtypes: float64(2), int64(2), object(5)
memory usage: 44.2+ KB


In [7]:
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',
                       'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

In [8]:
# feature columns
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()  # gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

print(feature_columns)

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='n_siblings_spouses', vocabulary_list=(1, 0, 3, 4, 2, 5, 8), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='parch', vocabulary_list=(0, 1, 2, 5, 3, 4), dtype=tf.int64, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, def

In [9]:
# input function: defines how the datasets is split into batches at each epoch
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  # inner function, this will be returned
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) 
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(num_epochs)  # split dataset into batches of 32 and repeat process for number of epochs
    return ds  # return a batch of the dataset
  return input_function  # return a function object for use

train_input_fn = make_input_fn(dftrain, y_train)
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

In [10]:
# define, train and evaluate the model
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(eval_input_fn)  

clear_output()
for k, v in result.items():
    print(f'{k:>20s}:\t{v:4f}')

            accuracy:	0.738636
   accuracy_baseline:	0.625000
                 auc:	0.835751
auc_precision_recall:	0.790810
        average_loss:	0.479295
          label/mean:	0.375000
                loss:	0.471968
           precision:	0.644231
     prediction/mean:	0.409639
              recall:	0.676768
         global_step:	200.000000


### First dense neural network

In [11]:
# load the iris dataset
train_path = tf.keras.utils.get_file("iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
test_path = tf.keras.utils.get_file("iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv


In [12]:
train.info()
# train.head()
# train.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   SepalLength  120 non-null    float64
 1   SepalWidth   120 non-null    float64
 2   PetalLength  120 non-null    float64
 3   PetalWidth   120 non-null    float64
 4   Species      120 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 4.8 KB


In [13]:
train_y = train.pop('Species')
test_y = test.pop('Species')

In [14]:
# feature columns
feature_columns = []
for key in train.keys():
    feature_columns.append(tf.feature_column.numeric_column(key=key))
print(feature_columns)

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


In [15]:
# input function
def input_fn(features, labels, training=True, batch_size=256):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    if training:
        dataset = dataset.shuffle(1000).repeat()
    
    return dataset.batch(batch_size)

In [16]:
# build the model
classifier = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    hidden_units=[30, 10], # 2 hidden layers of 30 and 10 nodes
    n_classes=3)



In [17]:
classifier.train(
    input_fn = lambda: input_fn(train, train_y, training=True), # we usa a lambda here
    steps=5000)

clear_output()

In [18]:
eval_result = classifier.evaluate(
    input_fn=lambda: input_fn(test, test_y, training=False))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))


Test set accuracy: 0.733

