[google colab](https://colab.research.google.com/drive/1_sAitZMGkBqcDXTMYLs2RIkpo9RRKZdF)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers

In [2]:
data = {'marks': [55,21,63,88,74,54,95,41,84,52],
        'grade': ['average','poor','average','good','good','average','good','average','good','average'],
        'point': ['c','f','c+','b+','b','c','a','d+','b+','c']}

In [3]:
df = pd.DataFrame(data)
df

Unnamed: 0,marks,grade,point
0,55,average,c
1,21,poor,f
2,63,average,c+
3,88,good,b+
4,74,good,b
5,54,average,c
6,95,good,a
7,41,average,d+
8,84,good,b+
9,52,average,c


In [4]:
df.dtypes

marks     int64
grade    object
point    object
dtype: object

In [5]:
# A utility method to show transromation from feature column
def demo(feature_column):
    feature_layer = layers.DenseFeatures(feature_column)
    print(feature_layer(data).numpy())

In [6]:
marks = feature_column.numeric_column("marks")
feature_layer = layers.DenseFeatures(marks)
print(feature_layer(data))
print(feature_layer(data).numpy().shape)

tf.Tensor(
[[55.]
 [21.]
 [63.]
 [88.]
 [74.]
 [54.]
 [95.]
 [41.]
 [84.]
 [52.]], shape=(10, 1), dtype=float32)
(10, 1)


In [7]:
marks = feature_column.numeric_column("marks")
marks_buckets = feature_column.bucketized_column(marks, boundaries=[30,40,50,60,70,80,90])
feature_layer = layers.DenseFeatures(marks_buckets)
print(feature_layer(data))
print(feature_layer(data).numpy())

tf.Tensor(
[[0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]], shape=(10, 8), dtype=float32)
[[0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]]


## Indicator and embedding columns
Indicator columns and embedding columns never work on features directly, but instead take categorical columns as input.

In [8]:
grade = feature_column.categorical_column_with_vocabulary_list(
    'grade', ['poor', 'average', 'good'])
grade_one_hot = feature_column.indicator_column(grade)
feature_layer = layers.DenseFeatures(grade_one_hot)
print(feature_layer(data))

tf.Tensor(
[[0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]], shape=(10, 3), dtype=float32)


In [9]:
point = feature_column.categorical_column_with_vocabulary_list(
      'point', df['point'].unique())
point_one_hot = feature_column.indicator_column(point)
feature_layer = layers.DenseFeatures(point_one_hot)
print(feature_layer(data).numpy())

[[1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]]


In [10]:
data1 = {'marks1': [55,21,63,88,74,54,95,41,84,52],
        'grade1': ['average','poor','average','good','good','average','good','average','good','average'],
        'point1': ['c','c','c','b','b','c','a','c','b','c']}
df1 = pd.DataFrame(data1)

In [11]:
point1 = feature_column.categorical_column_with_vocabulary_list(
    'point1', df1['point1'].unique())
point1_one_hot = feature_column.indicator_column(point1)
feature_layer1 = layers.DenseFeatures(point1_one_hot)
print(feature_layer1(data1))

tf.Tensor(
[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]], shape=(10, 3), dtype=float32)


In [12]:
# Notice the input to the embedding column is the categorical column
# we previously created
point = feature_column.categorical_column_with_vocabulary_list(
    'point', df['point'].unique())
point_embedding = feature_column.embedding_column(point, dimension=4)
feature_layer = layers.DenseFeatures(point_embedding)
print(feature_layer(data))

tf.Tensor(
[[-0.9210452  -0.24410504  0.05263579  0.00426026]
 [ 0.04256853 -0.12394153 -0.53852063 -0.35018656]
 [-0.12812604  0.48414207 -0.78286046  0.10103692]
 [-0.3610778   0.30405998  0.64818835  0.83606577]
 [ 0.15670884 -0.03258523 -0.67206484 -0.1578501 ]
 [-0.9210452  -0.24410504  0.05263579  0.00426026]
 [-0.9761848   0.8016284  -0.08925328 -0.23772472]
 [-0.38139325 -0.6212604   0.28508696 -0.3042958 ]
 [-0.3610778   0.30405998  0.64818835  0.83606577]
 [-0.9210452  -0.24410504  0.05263579  0.00426026]], shape=(10, 4), dtype=float32)


In [13]:
point_hashed = feature_column.categorical_column_with_hash_bucket(
    'point', hash_bucket_size=6)
point_hashed = feature_column.indicator_column(point_hashed)
feature_layer = layers.DenseFeatures(point_hashed)
print(feature_layer(data))

tf.Tensor(
[[0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]], shape=(10, 6), dtype=float32)


In [15]:
crossed_feature = feature_column.crossed_column([marks_buckets, grade], hash_bucket_size=10)
feature_column = feature_column.indicator_column(crossed_feature)
feature_layer = layers.DenseFeatures(feature_column)
print(feature_layer(data))

tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]], shape=(10, 10), dtype=float32)
