In [46]:
import numpy as np
import tensorflow as tf

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [3]:
housing = fetch_california_housing()

In [4]:
X, y = housing.data, housing.target

In [5]:
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, random_state=42, test_size=0.15)

In [6]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42, test_size=0.15)

# 1. Normalization Layer

In [7]:
norm_layer = tf.keras.layers.Normalization()

In [8]:
model = tf.keras.Sequential([
    norm_layer,
    tf.keras.layers.Dense(1)
])

In [9]:
model.compile(loss = 'mse', optimizer = 'adam', metrics=['accuracy'])

In [10]:
norm_layer.adapt(X_train)

In [13]:
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=5)

Epoch 1/5
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 838us/step - accuracy: 0.0033 - loss: 0.5223 - val_accuracy: 0.0027 - val_loss: 0.5142
Epoch 2/5
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 766us/step - accuracy: 0.0032 - loss: 0.5211 - val_accuracy: 0.0027 - val_loss: 0.5132
Epoch 3/5
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 804us/step - accuracy: 0.0039 - loss: 0.5380 - val_accuracy: 0.0027 - val_loss: 0.5122
Epoch 4/5
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 796us/step - accuracy: 0.0029 - loss: 0.5286 - val_accuracy: 0.0027 - val_loss: 0.5129
Epoch 5/5
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 752us/step - accuracy: 0.0037 - loss: 0.5227 - val_accuracy: 0.0027 - val_loss: 0.5124


<keras.src.callbacks.history.History at 0x183c74a10>

# Normalization Layer

In [15]:
norm_layer = tf.keras.layers.Normalization()

In [16]:
model = tf.keras.Sequential([
    norm_layer,
    tf.keras.layers.Dense(1)
])

In [17]:
model.compile(loss = 'mse', optimizer = 'adam')

In [18]:
norm_layer.adapt(X_train)

In [19]:
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=4)

Epoch 1/4
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 7.0463 - val_loss: 4.7949
Epoch 2/4
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 4.3112 - val_loss: 2.8976
Epoch 3/4
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 2.6425 - val_loss: 1.7562
Epoch 4/4
[1m466/466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.5569 - val_loss: 1.1229


<keras.src.callbacks.history.History at 0x180e63950>

# Discretization Layer

## Example 1 - Age categories of 3

In [22]:
age = tf.constant([[10], [93], [54], [67], [34], [16],[29], [55], [64]])

In [23]:
age

<tf.Tensor: shape=(9, 1), dtype=int32, numpy=
array([[10],
       [93],
       [54],
       [67],
       [34],
       [16],
       [29],
       [55],
       [64]], dtype=int32)>

In [24]:
discretization_layer = tf.keras.layers.Discretization(bin_boundaries=[18, 50])

In [25]:
age_categories_1 = discretization_layer(age)

In [26]:
age_categories_1

<tf.Tensor: shape=(9, 1), dtype=int64, numpy=
array([[0],
       [2],
       [2],
       [2],
       [1],
       [0],
       [1],
       [2],
       [2]])>

## Example 2 - Age categories of more than 5

In [27]:
discretization_layer = tf.keras.layers.Discretization(bin_boundaries=[5, 13, 18, 40, 60])

In [28]:
age = tf.constant([[2], [3], [9], [12], [13],[16],[17], [22], [32], [59], [42], [63],[69]])

In [29]:
age_categories_2 = discretization_layer(age)

In [30]:
age_categories_2

<tf.Tensor: shape=(13, 1), dtype=int64, numpy=
array([[0],
       [0],
       [1],
       [1],
       [2],
       [2],
       [2],
       [3],
       [3],
       [4],
       [4],
       [5],
       [5]])>

# Category Encoding

## Example 1 - three categories 

In [31]:
onehot_layer_1 = tf.keras.layers.CategoryEncoding(num_tokens=3)

In [32]:
onehot_layer_1(age_categories_1)

<tf.Tensor: shape=(9, 3), dtype=float32, numpy=
array([[1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)>

## Example 2 - 6 categories 

In [33]:
onehot_layer_2 = tf.keras.layers.CategoryEncoding(num_tokens=6)

In [34]:
onehot_layer_2(age_categories_2)

<tf.Tensor: shape=(13, 6), dtype=float32, numpy=
array([[1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.]], dtype=float32)>

# StringLookup Layer

In [40]:
cities = ['Dallas', 'Austin', 'Fort-worth', 'Houston', 'Montreal', 'Cuba']

In [41]:
str_lookup_layer = tf.keras.layers.StringLookup()

In [42]:
str_lookup_layer.adapt(cities)

In [43]:
str_lookup_layer([['Austin'], ['Dallas'], ['Montreal'], ['Massachusetts']])

<tf.Tensor: shape=(4, 1), dtype=int64, numpy=
array([[6],
       [4],
       [1],
       [0]])>

# Embeddings

In [44]:
tf.random.set_seed(42)

In [45]:
embedding_layer = tf.keras.layers.Embedding(input_dim=5, output_dim=2)

In [47]:
embedding_layer(np.array([2, 4, 2]))

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[ 2.6561808e-02, -3.7382316e-02],
       [ 9.2327595e-05,  3.1468842e-02],
       [ 2.6561808e-02, -3.7382316e-02]], dtype=float32)>