<a href="https://colab.research.google.com/github/zerotodeeplearning/ztdl-masterclasses/blob/master/solutions_do_not_open/The_Power_of_TensorFlow_solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Learn with us: www.zerotodeeplearning.com

Copyright © 2021: Zero to Deep Learning ® Catalit LLC.

In [None]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# The Power of TensorFlow

In [None]:
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
file_path = tf.keras.utils.get_file("australian_credit.csv",
                                    "https://raw.githubusercontent.com/zerotodeeplearning/ztdl-masterclasses/master/data/australian_credit.csv")

In [None]:
df = pd.read_csv(file_path)
df.head()

In [None]:
BATCH_SIZE=5

### tf.data API

In [None]:
def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=BATCH_SIZE,
      label_name="class",
      na_value="?",
      num_epochs=1,
      ignore_errors=True, 
      **kwargs)
  return dataset

In [None]:
raw_train_data = get_dataset(file_path)

In [None]:
raw_train_data

In [None]:
def print_batch(batch):
  for key, value in batch.items():
    print("{:20s}: {}".format(key, value.numpy()))

In [None]:
example_batch, example_labels = next(iter(raw_train_data))

In [None]:
example_batch

In [None]:
example_labels

In [None]:
print_batch(example_batch)

### Feature Columns API

In [None]:
from tensorflow.keras.layers import DenseFeatures
from tensorflow.feature_column import numeric_column, bucketized_column
from tensorflow.feature_column import categorical_column_with_vocabulary_list, indicator_column
from tensorflow.feature_column import embedding_column, crossed_column

In [None]:
def demo(feature_column):
    feature_layer = DenseFeatures(feature_column)
    print("Feature column type:")
    print(feature_column)
    print()
    print("Example batch data:")
    print(feature_layer(example_batch).numpy())

In [None]:
age = numeric_column("age")

demo(age)

In [None]:
age_buckets = bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])

demo(age_buckets)

In [None]:
occupation_vocab = df['occupation'].unique()

occupation_vocab

In [None]:
occupation = categorical_column_with_vocabulary_list(
      'occupation', occupation_vocab)

occupation_one_hot = indicator_column(occupation)

demo(occupation_one_hot)

In [None]:
occupation_embedding = embedding_column(
    occupation, dimension=8)

demo(occupation_embedding)

### Exercise 1: other feature columns

There are several other types of feature columns.

- Create a feature column of type `crossed_column` using the `age_buckets` and `occupation` columns.

    ```python
    crossed_feature = crossed_column( ....
    ```

- wrap `crossed_feature` into an indicator column and use the `demo` function to check that it works as expected

- Create a `numeric_column` for each of the following numeric features:

    ```python
    numeric_cols = ['age', 'time_at_addr', 'time_w_empl',
                    'time_w_bank', 'monthly_housing', 'savings_balance']
    ```

- Combine all of the above feature columns and these columns into a single list of feature columns:

    ```python
    feature_columns = [....]
    ```
In the next part we will use these to train a model.




In [None]:
crossed_feature = crossed_column(
    [age_buckets, occupation], hash_bucket_size=1000)

crossed_feature = indicator_column(crossed_feature)

demo(crossed_feature)

In [None]:
numeric_cols = ['age', 'time_at_addr', 'time_w_empl',
                'time_w_bank', 'monthly_housing', 'savings_balance']

In [None]:
feature_columns = []

for c in numeric_cols:
    feature_columns.append(numeric_column(c))
    
feature_columns.append(age_buckets)
feature_columns.append(occupation_one_hot)
feature_columns.append(occupation_embedding)
feature_columns.append(crossed_feature)

### Model and Layers API

In [None]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model, Sequential

In [None]:
class MyDenseLayer(tf.keras.layers.Layer):
  def __init__(self, num_outputs, activation):
    super(MyDenseLayer, self).__init__()
    self.num_outputs = num_outputs
    self.activation = activation

  def build(self, input_shape):
    self.kernel = self.add_weight("kernel", 
                                  shape=[int(input_shape[-1]), 
                                          self.num_outputs],
                                  initializer='normal')

    self.bias = self.add_weight("bias", 
                                shape=[self.num_outputs,],
                                initializer='normal')

  def call(self, inputs):
    return self.activation(tf.matmul(inputs, self.kernel) + self.bias)

In [None]:
class MyModel(tf.keras.Model):

  def __init__(self):
    super(MyModel, self).__init__()
    self.features = DenseFeatures(feature_columns)
    self.dense1 = MyDenseLayer(128, activation=tf.nn.tanh)
    self.dense2 = MyDenseLayer(128, activation=tf.nn.tanh)
    self.dense3 = Dense(1)

  def call(self, inputs):
    return self.dense3(self.dense2(self.dense1(self.features(inputs))))

In [None]:
model = MyModel()

In [None]:
model(example_batch)

### Exercise 2

- Compile and train the model using an optimizer of your choice.

- Pay attention to how you define the loss. The model defined above returns the logits at the output (there is no sigmoid function) so you will have to create an instance of `BinaryCrossentropy` that takes logits instead of probabilities.

In [None]:
from tensorflow.keras.optimizers import RMSprop, Adam, SGD
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy

In [None]:
optimizer = Adam(lr=0.0005)
loss = BinaryCrossentropy(from_logits=True)
accuracy_score = BinaryAccuracy()

In [None]:
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=[accuracy_score])

In [None]:
h = model.fit(raw_train_data, epochs=50)

In [None]:
pd.DataFrame(h.history).plot()