# Introduction
The notebook is intended to experiment with the different TensorFlow APIs

In [1]:
# Import Standard Libraries
import os

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

from sklearn.model_selection import train_test_split

# Suppress warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

2023-06-14 11:24:50.128793: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Read Data

In [2]:
# Define a dataset
X = tf.constant(range(10), dtype=tf.float32)
Y = 2 * X + 10

## Pet Finder Mini Toy

In [3]:
# Read required data petfinder_mini_toy
petfinder_mini_toy = pd.read_csv('./../../data/petfinder-mini.csv')

In [4]:
petfinder_mini_toy.head()

Unnamed: 0,Type,Age,Breed1,Gender,Color1,Color2,MaturitySize,FurLength,Vaccinated,Sterilized,Health,Fee,Description,PhotoAmt,AdoptionSpeed
0,Cat,3,Tabby,Male,Black,White,Small,Short,No,No,Healthy,100,Nibble is a 3+ month old ball of cuteness. He ...,1,2
1,Cat,1,Domestic Medium Hair,Male,Black,Brown,Medium,Medium,Not Sure,Not Sure,Healthy,0,I just found it alone yesterday near my apartm...,2,0
2,Dog,1,Mixed Breed,Male,Brown,White,Medium,Medium,Yes,No,Healthy,0,Their pregnant mother was dumped by her irresp...,7,3
3,Dog,4,Mixed Breed,Female,Black,Brown,Medium,Short,Yes,No,Healthy,150,"Good guard dog, very alert, active, obedience ...",8,2
4,Dog,1,Mixed Breed,Male,Black,No Color,Medium,Short,No,No,Healthy,0,This handsome yet cute boy is up for adoption....,3,2


In [5]:
# Create target variable
petfinder_mini_toy['target'] = np.where(petfinder_mini_toy['AdoptionSpeed']==4, 0, 1)

# Drop unused columns
petfinder_mini_toy = petfinder_mini_toy.drop(columns=['AdoptionSpeed', 'Description'])

In [6]:
# Split train + validation + test
petfinder_mini_toy_train, petfinder_mini_toy_test = train_test_split(petfinder_mini_toy, test_size=0.2)
petfinder_mini_toy_train, petfinder_mini_toy_validation = train_test_split(petfinder_mini_toy_train, test_size=0.2)

print(len(petfinder_mini_toy_train), 'train examples')
print(len(petfinder_mini_toy_validation), 'validation examples')
print(len(petfinder_mini_toy_test), 'test examples')

7383 train examples
1846 validation examples
2308 test examples


# Dataset API

## Create

In [7]:
# Create a dataset
dataset = tf.data.Dataset.from_tensor_slices((X, Y))

## Fetch

In [8]:
# Retrieve the data samples
for x, y in dataset:
    print(f'{x} - {y}')

0.0 - 10.0
1.0 - 12.0
2.0 - 14.0
3.0 - 16.0
4.0 - 18.0
5.0 - 20.0
6.0 - 22.0
7.0 - 24.0
8.0 - 26.0
9.0 - 28.0


2023-06-14 11:24:52.783269: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [10]
	 [[{{node Placeholder/_1}}]]


## Batch and Epochs

In [9]:
# Define batch size and epochs
batch_size = 3
epochs = 6

In [10]:
# Add batches and epochs
dataset = dataset.repeat(epochs).batch(batch_size, drop_remainder=True)

In [11]:
# Reitreve the batches for the epochs
for batch_x, batch_y in dataset:
    print(f'{batch_x} - {batch_y}')

[0. 1. 2.] - [10. 12. 14.]
[3. 4. 5.] - [16. 18. 20.]
[6. 7. 8.] - [22. 24. 26.]
[9. 0. 1.] - [28. 10. 12.]
[2. 3. 4.] - [14. 16. 18.]
[5. 6. 7.] - [20. 22. 24.]
[8. 9. 0.] - [26. 28. 10.]
[1. 2. 3.] - [12. 14. 16.]
[4. 5. 6.] - [18. 20. 22.]
[7. 8. 9.] - [24. 26. 28.]
[0. 1. 2.] - [10. 12. 14.]
[3. 4. 5.] - [16. 18. 20.]
[6. 7. 8.] - [22. 24. 26.]
[9. 0. 1.] - [28. 10. 12.]
[2. 3. 4.] - [14. 16. 18.]
[5. 6. 7.] - [20. 22. 24.]
[8. 9. 0.] - [26. 28. 10.]
[1. 2. 3.] - [12. 14. 16.]
[4. 5. 6.] - [18. 20. 22.]
[7. 8. 9.] - [24. 26. 28.]


2023-06-14 11:24:52.868466: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [10]
	 [[{{node Placeholder/_1}}]]


## Read CSV

In [12]:
# Define feature columns
features = ['date', 
            'country', 
            'store', 
            'product']

# Define labels
labels = ['num_sold']

# Read data from csv
dataset_csv = tf.data.experimental.make_csv_dataset(file_pattern='./../../data/books_sold_train.csv', batch_size=4)

In [13]:
# Fetch data
for step, (row) in enumerate(dataset_csv):

    print(row['row_id'])

    if step == 3:
        break

2023-06-14 11:24:52.945750: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_9' with dtype string and shape [1]
	 [[{{node Placeholder/_9}}]]
2023-06-14 11:24:52.946231: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_9' with dtype string and shape [1]
	 [[{{node Placeholder/_9}}]]


tf.Tensor([7170 3036 4382 9042], shape=(4,), dtype=int32)
tf.Tensor([6512 6178 4024 2890], shape=(4,), dtype=int32)
tf.Tensor([8476 6304 2751 1807], shape=(4,), dtype=int32)
tf.Tensor([7045 4519 8534 1873], shape=(4,), dtype=int32)


## Create Dataset from DataFrame

In [14]:
def dataframe_to_dataset(data: pd.DataFrame, 
                         label_column: str, 
                         shuffle:bool = True, 
                         epochs:int = None, 
                         batch_size:int = 32):

    # Ensure to not work on the original dataframe
    data_copied = data.copy()

    # Extract the label
    labels = data_copied.pop(label_column)

    # Create the dataset
    dataset = tf.data.Dataset.from_tensor_slices((dict(data_copied), labels))

    # Shuffle dataset
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(data_copied))

    # Repeat for epochs
    if epochs:
        dataset = dataset.repeat(epochs)

    # Add batches
    if batch_size:

        # Add batches 
        dataset = dataset.batch(batch_size)
                             
    return dataset          

In [15]:
# Create the Dataset from a DataFrame
petfinder_mini_toy_dataset = dataframe_to_dataset(petfinder_mini_toy, 'target', epochs=4)

## Take from Batch

In [16]:
[(batch_take_features, batch_take_labels)] = petfinder_mini_toy_dataset.take(1)

2023-06-14 11:24:53.077331: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_2' with dtype string and shape [11537]
	 [[{{node Placeholder/_2}}]]
2023-06-14 11:24:53.078059: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [11537]
	 [[{{node Placeholder/_1}}]]


In [17]:
batch_take_features['Type']

<tf.Tensor: shape=(32,), dtype=string, numpy=
array([b'Dog', b'Cat', b'Dog', b'Cat', b'Cat', b'Dog', b'Dog', b'Dog',
       b'Cat', b'Cat', b'Cat', b'Dog', b'Dog', b'Dog', b'Cat', b'Dog',
       b'Cat', b'Cat', b'Cat', b'Dog', b'Dog', b'Dog', b'Dog', b'Dog',
       b'Dog', b'Dog', b'Cat', b'Dog', b'Cat', b'Cat', b'Cat', b'Dog'],
      dtype=object)>

# Linear Regression Training

In [18]:
def loss_mse(X, Y, w0, b):
    """
    Compute the Loss as Mean Squared Error
    """
    
    # Predict the y value
    y_predicted = w0 * X + b

    # Compute the error
    errors = (y_predicted - Y)**2

    # Compute the mean
    mse = tf.reduce_mean(errors)
    
    return mse

In [19]:
def compute_derivate_terms(X, Y, w0, b):
    """
    Compute the derivative terms for w0 and b
    """
    
    with tf.GradientTape() as tape:
        loss = loss_mse(X, Y, w0, b)
    return tape.gradient(loss, [w0, b])

In [20]:
# Initialise w0 and b
w0, b = tf.Variable(0.0), tf.Variable(0.0)

# Set learning rate
learning_rate = 0.001

# Create bigger dataset
dataset = tf.data.Dataset.from_tensor_slices((X, Y)).repeat(250).batch(2, drop_remainder=True)

for step, (X_batch, Y_batch) in enumerate(dataset):

    # Compute the derivate terms
    dw0, db = compute_derivate_terms(X_batch, Y_batch, w0, b)

    # Update w0 and b
    w0.assign_sub(dw0 * learning_rate)
    b.assign_sub(db * learning_rate)

    # Print loss every 100 steps
    if step % 100 == 0:
        loss = loss_mse(X_batch, Y_batch, w0, b)
        print(f'Step {step} - Loss {loss}')

2023-06-14 11:24:53.191198: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype float and shape [10]
	 [[{{node Placeholder/_1}}]]


Step 0 - Loss 121.37281799316406
Step 100 - Loss 67.96109008789062
Step 200 - Loss 60.568275451660156
Step 300 - Loss 54.036888122558594
Step 400 - Loss 48.209930419921875
Step 500 - Loss 43.01130676269531
Step 600 - Loss 38.37327575683594
Step 700 - Loss 34.235374450683594
Step 800 - Loss 30.54366683959961
Step 900 - Loss 27.25004768371582
Step 1000 - Loss 24.31158447265625
Step 1100 - Loss 21.689990997314453
Step 1200 - Loss 19.35108184814453
