# Building Complex Models Using the Functional API.
- Wide & Deep Neural network is an example of a nonsequential NN.
- It connects all or part of the inputs directly to the output layer, making it possible for the NN to learn both deep patterns (using the deep path) and simple rules (through the short path).
- In contrast a regular MLP forces all the data to flow through the full stack of layers (thus simple patterns may end up being distorted by this sequence of transformtions).

In [1]:
# Building the neural net using keras to tackel the california dataset.

import tensorflow as tf
from tensorflow import keras

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [3]:
input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation="relu")(input_)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.Concatenate()([input_, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.Model(inputs=[input_], outputs=[output])

In [4]:
import numpy as np
import pandas as pd

# Check for NaNs or Infinities
print("NaNs in X_train:", np.any(np.isnan(X_train)))
print("Infinities in X_train:", np.any(np.isinf(X_train)))
print("NaNs in y_train:", np.any(np.isnan(y_train)))
print("Infinities in y_train:", np.any(np.isinf(y_train)))

NaNs in X_train: False
Infinities in X_train: False
NaNs in y_train: False
Infinities in y_train: False


In [5]:
from tensorflow.keras.optimizers import SGD

optimizer = SGD(learning_rate=1e-3, clipnorm=1.0)
model.compile(loss="mean_squared_error", optimizer=optimizer)

history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)
X_new = X_test[3:]
y_pred = model.predict(X_new)
y_pred

Epoch 1/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 4.2479 - val_loss: 1.2840
Epoch 2/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.0318 - val_loss: 0.8114
Epoch 3/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.9093 - val_loss: 0.7573
Epoch 4/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.6738 - val_loss: 0.7103
Epoch 5/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.6541 - val_loss: 0.6724
Epoch 6/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.6228 - val_loss: 0.6411
Epoch 7/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.6471 - val_loss: 0.6133
Epoch 8/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.5942 - val_loss: 0.5896
Epoch 9/20
[1m363/363[0m [32m━━━━━━━━

array([[2.3932884],
       [2.257823 ],
       [1.5229313],
       ...,
       [2.2100081],
       [1.6809204],
       [1.0147047]], dtype=float32)

In [6]:
X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]

In [7]:
# Creating a network with multiple outputs.

from tensorflow.keras.layers import Input, Dense, concatenate
from tensorflow.keras.models import Model

input_A = Input(shape=[5], name="wide_input")
input_B = Input(shape=[6], name="deep_input")
hidden1 = Dense(30, activation="relu")(input_B)
hidden2 = Dense(30, activation="relu")(hidden1)
concat = concatenate([input_A, hidden2])
output = Dense(1, name="main_output")(concat)
aux_output = Dense(1, name="aux_output")(hidden2)
model = Model(inputs=[input_A, input_B], outputs=[output, aux_output])


In [8]:
from tensorflow.keras.losses import MeanSquaredError

# Define separate loss functions for main and auxiliary outputs
main_output_loss = MeanSquaredError()
aux_output_loss = MeanSquaredError()

# Compile the model with separate loss functions
model.compile(optimizer='adam',
              loss=[main_output_loss, aux_output_loss],
              metrics=[['mse'], ['mse']])

In [9]:
history = model.fit(
    [X_train_A, X_train_B], [y_train, y_train], 
    epochs=20,
    validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid])
)

Epoch 1/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - aux_output_mse: 3.3896 - loss: 6.7605 - main_output_mse: 3.3709 - val_aux_output_mse: 1.1026 - val_loss: 2.0220 - val_main_output_mse: 0.9194
Epoch 2/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - aux_output_mse: 0.8756 - loss: 1.5862 - main_output_mse: 0.7106 - val_aux_output_mse: 0.6697 - val_loss: 1.1741 - val_main_output_mse: 0.5044
Epoch 3/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - aux_output_mse: 0.6341 - loss: 1.1121 - main_output_mse: 0.4780 - val_aux_output_mse: 0.6027 - val_loss: 1.0546 - val_main_output_mse: 0.4519
Epoch 4/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - aux_output_mse: 0.5950 - loss: 1.0441 - main_output_mse: 0.4491 - val_aux_output_mse: 0.5597 - val_loss: 0.9832 - val_main_output_mse: 0.4235
Epoch 5/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms

In [10]:
metrics = model.evaluate([X_test_A, X_test_B], [y_test, y_test])
total_loss = metrics[0]
main_loss = metrics[1]
aux_loss = metrics[2]

print("Total loss:", total_loss)
print("Main loss:", main_loss)
print("Aux loss:", aux_loss)

[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - aux_output_mse: 0.4100 - loss: 0.7648 - main_output_mse: 0.3548
Total loss: 0.7299187779426575
Main loss: 0.3904551863670349
Aux loss: 0.33946338295936584


# Using the Subclassing API to Build Dynamic Models

In [11]:
class WideAndDeepModel(keras.Model):
    def __init__(self, units=30, activation="relu", **kwargs):
        super().__init__(**kwargs) # handles standard args (e.g, name)
        self.hidden1 = keras.layers.Dense(units, activation=activation)
        self.hidden2 = keras.layers.Dense(units, activation=activation)
        self.main_output = keras.layers.Dense(1)
        self.aux_output = keras.layers.Dense(1)


    def call(self, inputs):
        input_A, input_B = inputs
        hidden1 = self.hidden1(input_B)
        hidden2 = self.hidden2(hidden1)
        concat = keras.layers.concatenate([input_A, hidden2])
        main_output = sellf.main_output(concat)
        aux_output = self.aux_output(hidden2)
        return main_output, aux_output

model = WideAndDeepModel()