In [None]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
x = housing.data
y = housing.target
# you can see the size and dimension of the data using
print(x.shape, y.shape)
print(housing.feature_names)

(20640, 8) (20640,)
['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [None]:
#Splitting the dataset into training and testing
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print("Original size:", x.shape)
print("Training size:", x_train.shape)
print("Testing size:", x_test.shape)

Original size: (20640, 8)
Training size: (16512, 8)
Testing size: (4128, 8)


In [None]:
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
nn = keras.Sequential()

#Standardizing the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

nn.add( keras.layers.Input( shape=(8,) ) )                        #Input layer
nn.add( keras.layers.Dense(12, activation="relu"))                #1st hidden layer, 12 nodes, with relu for nonlinearity
nn.add( keras.layers.Dense(12, activation="relu"))                #2nd hidden layer, 12 nodes, with relu for nonlinearity
nn.add( keras.layers.Dense(1) )                                   #Output layer
nn.compile(optimizer="adam", loss="mse")                          #Compiling the model using Adam and MSE
history = nn.fit(x_train, y_train, epochs=100, batch_size=32, verbose=1)
history

Epoch 1/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3.7864
Epoch 2/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.6298
Epoch 3/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5053
Epoch 4/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.4539
Epoch 5/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.4325
Epoch 6/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.4117
Epoch 7/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.4039
Epoch 8/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3936
Epoch 9/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3925
Epoch 10/100
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms

<keras.src.callbacks.history.History at 0x7eafe625f7d0>

**Why the output layer should not have an activation function:**
The output layer should not have an activation function because the output should be any real number that is not restircted by the activation function.

In [None]:
from sklearn.metrics import mean_absolute_error

predictions = nn.predict(x_test)

mean_absolute_error(y_test, predictions) #Evaluating using the testing data

[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


0.3635487382464769

In [None]:
#Recreating the network
nn = keras.Sequential()

nn.add( keras.layers.Input( shape=(8,) ) )
nn.add( keras.layers.Dense(12, activation="relu"))               #Changing 1st layer to 12 nodes
nn.add( keras.layers.Dense(6, activation="relu"))                #Chaning 2nd layer to 6 nodes
nn.add( keras.layers.Dense(3, activation="relu"))                #Adding a 3rd layer with 3 nodes
nn.add( keras.layers.Dense(1) )

nn.compile(optimizer="adam", loss="mse")

history = nn.fit(x_train, y_train, epochs=75, batch_size=32, verbose=1) #Chaning epochs to 200
history

Epoch 1/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3.3983
Epoch 2/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5465
Epoch 3/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.4327
Epoch 4/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.4184
Epoch 5/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.4091
Epoch 6/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3786
Epoch 7/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3893
Epoch 8/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3691
Epoch 9/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3668
Epoch 10/75
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - lo

<keras.src.callbacks.history.History at 0x7eafe56a2f10>

In [None]:
predictions = nn.predict(x_test)

mean_absolute_error(y_test, predictions) #Evaluating using the testing data

[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


0.3811985001766451

Despite adding a thrid layer, lowering the number of nodes and epochs resulted in a less predictive model with a mean absolute error of 0.3812.

In [None]:
#Recreating the network
nn = keras.Sequential()

nn.add( keras.layers.Input( shape=(8,) ) )
nn.add( keras.layers.Dense(24, activation="relu"))               #Changing 1st layer to 24 nodes
nn.add( keras.layers.Dense(12, activation="relu"))               #Chaning 2nd layer to 12 nodes
nn.add( keras.layers.Dense(6, activation="relu"))                #Adding a 3rd layer with 6 nodes
nn.add( keras.layers.Dense(1) )

nn.compile(optimizer="adam", loss="mse")

history = nn.fit(x_train, y_train, epochs=200, batch_size=32, verbose=1) #Chaning epochs to 200
history

Epoch 1/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 3.0238
Epoch 2/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5320
Epoch 3/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.4014
Epoch 4/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3869
Epoch 5/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3699
Epoch 6/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3634
Epoch 7/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3413
Epoch 8/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3549
Epoch 9/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3267
Epoch 10/200
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms

<keras.src.callbacks.history.History at 0x7eafe5399ed0>

In [None]:
predictions = nn.predict(x_test)

mean_absolute_error(y_test, predictions) #Evaluating using the testing data

[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


0.34537201391950595

By adding another hidden layer, increasing the number of nodes in each layer, and chaning the epochs to 200, the mean absolute error decreased from about 0.3635 in the first model to 0.3454. This reduction in error demonstrates a more predictive model.