## TensorBoard
Example with Adutls dataset

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:95% !important; }</style>"))

In [2]:
import os, warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
import pandas as pd

### Load DataFrame
Initialize column names

In [3]:
cols = ["age",
"workclass",
"fnlwgt",
"education",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"label"]

Load Pandas DataFrame

In [4]:
train_df = pd.read_csv("../Lesson5/data/adult.data",header=None,index_col=None,names=cols)
test_df = pd.read_csv("../Lesson5/data/adult.test",header=None,index_col=None,names=cols)
del train_df["fnlwgt"],test_df["fnlwgt"]

test_df = test_df.drop(test_df.index[0])

train_df["label"] = train_df["label"].apply(lambda x: 0 if x == " <=50K" else 1)
test_df["label"] = test_df["label"].apply(lambda x: 0 if x == " <=50K." else 1)

test_df["age"] = test_df["age"].apply(lambda x: int(x))
test_df["education-num"] = test_df["education-num"].apply(lambda x: int(x))

In [5]:
features = cols[:-1]
label = cols[-1]

### DataSet
Convert `DataFrame` to a Tensor DataSet

In [6]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, label_name, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop(label_name)
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    return ds

Objects are strings so we need to convert them to numeric data before creating a tensor (tensorflow array)

In [7]:
sex_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
        key="sex",
        vocabulary_list=[" Male", " Female"])

Now we can do that with all the different column types ...

In [8]:
feature_columns = []

# numeric cols
for header in ["age", "capital-gain","capital-loss","hours-per-week",]:
    feature_columns.append(tf.feature_column.numeric_column(header))

In [9]:
# bucketized cols
age_buckets = tf.feature_column.bucketized_column(feature_columns[0], 
                                                  boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
feature_columns.append(age_buckets)

In [10]:
cat_features = ["workclass",
"education",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"native-country"]

In [11]:
def generate_cat_feature_columns(cat_feature_name,train_df,emb_dim=None):
    unique_vals = train_df[cat_feature_name].unique().tolist()
    feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
        key=cat_feature_name,
        vocabulary_list=unique_vals)
    if emb_dim == None:
        one_hot = tf.feature_column.indicator_column(feature_column)
        return one_hot
    else:
        one_hot = tf.feature_column.indicator_column(feature_column)
        emb_feature = tf.feature_column.embedding_column(feature_column, dimension=emb_dim)
        return one_hot,emb_feature

In [12]:
# indicator and embedding cols
for cat in cat_features:
    feature_columns = feature_columns + list(generate_cat_feature_columns(cat,train_df,emb_dim=8))

Create a `Feature Layer`

In [13]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [14]:
batch_size = 1000
train_ds = df_to_dataset(train_df, "label", batch_size=batch_size)

In [15]:
test_ds = df_to_dataset(test_df, "label", shuffle=False, batch_size=batch_size)

### Model

In [16]:
! rm -rf ./logs/

In [17]:
model = tf.keras.Sequential([
    feature_layer, ## First Layer of the network!!!
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

log_dir = "logs/fit/base" 

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.fit(train_ds,
          validation_data=test_ds,
          epochs=10,
          callbacks=[tensorboard_callback])

W1114 23:28:11.147489 140339887822656 callbacks.py:1218] TensorBoard Callback will ignore `write_graph=True`when `Model.run_eagerly=True`.`


Epoch 1/10


W1114 23:28:11.388534 140339887822656 deprecation.py:323] From /home/josem/my_envs/fastai/lib/python3.6/site-packages/tensorflow/python/feature_column/feature_column_v2.py:3048: VocabularyListCategoricalColumn._num_buckets (from tensorflow.python.feature_column.feature_column_v2) is deprecated and will be removed in a future version.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
W1114 23:28:11.426843 140339887822656 deprecation.py:323] From /home/josem/my_envs/fastai/lib/python3.6/site-packages/tensorflow/python/feature_column/feature_column_v2.py:2758: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
W1114 23:28:11.436711 140339887822656 deprecation.py:323] From /home/josem/my_envs/fastai/lib/python3.6/site-packages/tensorflow/python/feature_column/feature_column_v2.py:2902: to_int64 (from tensorflow.python

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fa32b72e208>