# Boston housing price regression dataset
Dataset taken from the StatLib library which is maintained at Carnegie Mellon University.

Samples contain 13 attributes of houses at different locations around the Boston suburbs in the late 1970s. Targets are the median values of the houses at a location (in k$).

### Import TensorFlow


In [29]:
import tensorflow as tf
print(tf.__version__)
tf.random.set_seed(42)

2.16.1


### Import dataset
- This dataset can be imported
- High level API Keras has some datasets available
- You can look at all the datasets available here https://keras.io/datasets/


In [30]:
from tensorflow.keras.datasets import boston_housing
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from sklearn import metrics
# boston_housing.load_data() function returns 2 tuples, one for train data and 
# other for test data. We will take only train data here.
#(features, actual_prices), _ = boston_housing.load_data(test_split=0.1)

(x_train,y_train),(x_test,y_test) = boston_housing.load_data(test_split = 0.1)

### Getting details of dataset
- We will see how many rows are there in the data
- We will check how many features are there

In [31]:
print('Number of examples: ', x_train.shape[0])
print('Number of features for each example: ', x_train.shape[1])
print('Shape of actual prices data: ', y_train.shape)

Number of examples:  455
Number of features for each example:  13
Shape of actual prices data:  (455,)


In [32]:
print('Number of examples: ', x_test.shape[0])
print('Number of features for each example: ', x_test.shape[1])
print('Shape of actual prices data: ', y_test.shape)

Number of examples:  51
Number of features for each example:  13
Shape of actual prices data:  (51,)


In [33]:
x_train[0]

array([  1.23247,   0.     ,   8.14   ,   0.     ,   0.538  ,   6.142  ,
        91.7    ,   3.9769 ,   4.     , 307.     ,  21.     , 396.9    ,
        18.72   ])

In [34]:
y_train[0]

15.2

### Build the model
- The Sequential model is a linear stack of layers.
- The model needs to know what input shape it should expect. For this reason, the first layer in a Sequential model (and only the first, because following layers can do automatic shape inference) needs to receive information about its input shape.
- You can also simply add layers via the .add() method
- You can read more about it here https://keras.io/getting-started/sequential-model-guide/

In [40]:
#Scaling the dataset
from sklearn.preprocessing import StandardScaler 
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [41]:
x_train[0]

array([-0.2751689 , -0.48028321, -0.43093498, -0.26091209, -0.14777338,
       -0.17966947,  0.8331275 ,  0.0979132 , -0.63105852, -0.59417767,
        1.16480522,  0.45007505,  0.84399647])

```
INPUT(13) -> 2 HIDDEN LAYERS -> OUTPUT(1)
```

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [65]:
# Initialize Sequential model
model = Sequential()
model.add(Dense(32,activation = 'relu', input_shape = (13,)))
model.add(Dense(16,activation = 'relu'))
# Add Dense layer for prediction - Keras declares weights and bias automatically
model.add(Dense(1)) # dense =1 meaning only single valued o/p e.g. Regression problem,if classification activation function is softmax|

In [66]:
13 * 32 + 32 # bias is added 

448

In [67]:
32 * 16 + 16

528

In [68]:
16 * 1 + 1

17

In [69]:
model.summary()

### Compile the model
- Here we configure the model for training
- We will specify an optimizer and a loss function
- You can read more about it here https://keras.io/models/sequential/


In [70]:
# Compile the model - add mse as loss and stochastic gradient descent as optimizer
model.compile(optimizer='sgd', loss='mse')

### Fit the model
- .fit() trains the model for a fixed number of epochs (iterations on a dataset)
- An epoch is an iteration over the entire x and y data provided

In [71]:
model.fit(x_train, y_train, epochs=20)

Epoch 1/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 990us/step - loss: 455.1245
Epoch 2/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 396.7732 
Epoch 3/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 923us/step - loss: 222.5031
Epoch 4/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 83.9099  
Epoch 5/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 74.6322 
Epoch 6/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 965us/step - loss: 30.1822
Epoch 7/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 996us/step - loss: 22.1057
Epoch 8/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 948us/step - loss: 21.5744
Epoch 9/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 975us/step - loss: 15.8968
Epoch 10/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 956us

<keras.src.callbacks.history.History at 0x273089430b0>

In [125]:
y_pred  = model.predict(x_train)
print('R^2:',metrics.r2_score(y_train, y_pred))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
R^2: 0.8802262136313579
MAE: 2.2812329675863077
MSE: 9.905997995291683
RMSE: 3.1473795442068444


In [73]:
y_pred  = model.predict(x_test)
print('R^2:',metrics.r2_score(y_test, y_pred))
print('MAE:',metrics.mean_absolute_error(y_test, y_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
R^2: 0.6944147793934469
MAE: 3.6184901555379234
MSE: 30.185271037681925
RMSE: 5.494112397620013


In [131]:
import os
from tensorflow.keras.callbacks import ModelCheckpoint
# fname = os.path.sep.join([args["weights"],"weights-{epoch:03d}-{val_loss:.4f}.hdf5"])
checkpoint = ModelCheckpoint("Boston.weights.h5", monitor="val_loss", mode="min",save_best_only=True, verbose=1,save_weights_only=True)
callbacks = [checkpoint]

In [132]:
# Initialize Sequential model
from tensorflow.keras.models import load_model
model_complex = tf.keras.models.Sequential()
model_complex.add(tf.keras.layers.Dense(128,activation  = 'relu',input_shape=(13,)))
model_complex.add(tf.keras.layers.Dense(64,activation  = 'relu'))
model_complex.add(tf.keras.layers.Dense(32,activation  = 'relu'))
model_complex.add(tf.keras.layers.Dense(16,activation  = 'relu'))
# Add Dense layer for prediction - Keras declares weights and bias automatically
model_complex.add(tf.keras.layers.Dense(1))
# Compile the model - add mse as loss and stochastic gradient descent as optimizer
model_complex.compile(optimizer='adam', loss='mse',metrics=['accuracy'])
results= model_complex.fit(x_train, y_train, epochs=150,callbacks=callbacks)
print('Test accuracy: ', results[1])

Epoch 1/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 602.5862
Epoch 2/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 502.0245 
Epoch 3/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 284.6194 
Epoch 4/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 86.5818  
Epoch 5/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 49.5340 
Epoch 6/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 35.9503 
Epoch 7/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 26.2306 
Epoch 8/150
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 23.876

TypeError: 'History' object is not subscriptable

In [22]:
model_complex.summary()

In [23]:
y_pred  = model_complex.predict(x_train)
print('R^2:',metrics.r2_score(y_train, y_pred))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
R^2: 0.8746463484229118
MAE: 2.3254378362802357
MSE: 10.367485731838157
RMSE: 3.21985802976438


In [24]:
y_pred  = model_complex.predict(x_test)
print('R^2:',metrics.r2_score(y_test, y_pred))
print('MAE:',metrics.mean_absolute_error(y_test, y_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
R^2: 0.6823136231830722
MAE: 3.6051652010749367
MSE: 31.380605940837423
RMSE: 5.601839514020142


In [25]:
# Import Random Forest Regressor

# Create a Random Forest Regressor
reg = RandomForestRegressor()

# Train the model using the training sets 
reg.fit(x_train, y_train)
# Model prediction on train data
y_pred = reg.predict(x_train)
# Model Evaluation
print('R^2:',metrics.r2_score(y_train, y_pred))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

R^2: 0.9853613595755986
MAE: 0.7736659340659339
MSE: 1.2107018329670318
RMSE: 1.1003189687390798


In [26]:
y_pred  = reg.predict(x_test)
print('R^2:',metrics.r2_score(y_test, y_pred))
print('MAE:',metrics.mean_absolute_error(y_test, y_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R^2: 0.7994052774009466
MAE: 2.8314117647058827
MSE: 19.814459803921558
RMSE: 4.4513435953565255
