# Regression multi-layer perceptron 

In [2]:
# Difference to the classification model: now we have the output layer with a single neuron.
# and uses no activation function. The loss function is now the mean squared error.

# Creating a dataset
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
print(X_val[:5])

[[-1.18614648  0.74594263 -0.77399406 -0.09363536 -0.02081005 -0.1415088
  -0.75831637  0.61221544]
 [-0.25101064 -1.89193558 -0.08733436 -0.03905665 -0.10288604 -0.08137096
  -0.74419486  1.12834141]
 [ 0.43652248  0.66600693 -0.05992539 -0.27279202 -0.68080258 -0.02761702
  -0.78655938  0.77757619]
 [-0.16384814  0.10645701 -0.26251002 -0.23082044  0.4081851   0.29373632
  -0.89953143  0.8427183 ]
 [-0.17826084 -0.29322151 -0.10595047 -0.14117522 -0.32034512 -0.07436501
   1.37873824 -0.91611874]]


In [3]:
# Fitting the model
# The dataset is noisy - use a single hidden layer with fewer neurons to avoid overfitting
import tensorflow as tf
from tensorflow import keras
model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=X_train.shape[1:]),
    keras.layers.Dense(1)
])

model.compile(loss="mean_squared_error", optimizer="sgd")
history = model.fit(X_train, y_train, epochs = 5,
                    validation_data = (X_val, y_val))
mse_test = model.evaluate(X_test, y_test)
X_new = X_test[:3] # pretending these are new instances
y_pred = model.predict(X_new)
print(y_pred)

[[1.4036465]
 [0.8963295]
 [0.961169 ]]


## Building complex models using Functional API


In [4]:
# A non-sequential NN: Wide & Deep NN. It connects all(or part) of inputs directly to the output layer.
# It allows NN to learn both deep and simple pattern, whereas in a regular MLP, the data is
# forced to go through the full stack of layers, possibly result in a distortion of simple patterns.

input_ = keras.layers.Input(shape=X_train.shape[1:]) # specifies the kind of input, including the shape and dtype.
hidden1 = keras.layers.Dense(30, activation="relu")(input_) # the 1st hidden layer is connected to the input_ layer
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1) # the 2nd hidden layer is connected to the hidden1 layer
concat= keras.layers.Concatenate()([input_, hidden2]) # concat layer is connected to the input_ and hidden2 layers
output = keras.layers.Dense(1)(concat)
model2 = keras.Model(inputs=[input_], outputs=[output])
model2.compile(loss="mean_squared_error", optimizer="sgd")
history2 = model2.fit(X_train, y_train, epochs=5, validation_data = (X_val, y_val))
mse_test = model.evaluate(X_test, y_test)
X_new = X_test[:3]
y_pred2 = model.predict(X_new)
print(y_pred)
print(y_pred2)

[[1.4036465]
 [0.8963295]
 [0.961169 ]]
[[1.4036465]
 [0.8963295]
 [0.961169 ]]


### When want to send a subset of the features through the wide path and another subset(possibly overlapping) through the deep path:
* e.g. send 5 features through the wide path(0 to 4), and 6 features(2 to 7)

In [5]:
input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="output")(concat)
model = keras.Model(inputs=[input_A, input_B], outputs=[output])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(lr=1e-3))

X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_val_A, X_val_B = X_val[:, :5], X_val[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]

history = model.fit((X_train_A, X_train_B), y_train, epochs=5, # must pass a pair of matrices (X_train_A, X_train_B)
                    validation_data=((X_val_A, X_val_B), y_val))
mse_test = model.evaluate((X_test_A, X_test_B), y_test)
y_pred = model.predict((X_new_A, X_new_B))   



## When want to add some aux output
* To ensure that the underlying part of the network learns something on its own without relying on the rest of the newwork

In [7]:
# Input to concat layer is the same as above
input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="main_output")(concat)
aux_output = keras.layers.Dense(1, name="aux_output")(hidden2) # for regularization
model = keras.Model(inputs=[input_A, input_B], outputs=[output, aux_output])

# Each output needs its own loss function - pass a list of loss functions.
model.compile(loss=["mse", "mse"], loss_weights=[0.9, 0.1], optimizer = "sgd") # the main output gets larger weight.
history = model.fit(
    [X_train_A, X_train_B], [y_train, y_train], epochs=5,
    validation_data=([X_val_A, X_val_B], [y_val, y_val]))
total_loss, main_loss, aux_loss = model.evaluate([X_test_A, X_test_B], [y_test, y_test])

y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])


