# Building a Regression Model in Keras

## Introduction to Deep Learning & Neural Networks with Keras

Set Up Notebook.

In [1]:
!pip install numpy==2.0.2
!pip install pandas==2.2.2
!pip install matplotlib==3.9.2
!pip install tensorflow_cpu==2.18.0

Collecting numpy==2.0.2
  Downloading numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m86.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-2.0.2
Collecting pandas==2.2.2
  Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting tzdata>=2022.7 (from pandas==2.2.2)
  Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m88.4 MB/s[0m eta 

In [20]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.8/60.8 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.5/13.5 MB[0m [31m98.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hDownloading joblib-1.4.2-py3-none-any.whl (301 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.8/301.8

In [3]:
import numpy as np
import pandas as pd

import keras
from keras.layers import Dense
from keras.layers import Input
from keras.models import Sequential
from keras.utils import to_categorical # loading libraries

2025-01-03 01:34:59.349740: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-03 01:34:59.402057: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Data Overview and Preprocessing

In [8]:
df = pd.read_csv('concrete_data.csv') # reading csv file

In [17]:
df.shape # 1030 observations; 9 columns

(1030, 9)

In [9]:
df.head() # data preview

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [12]:
df.keys() # columns names

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

In [14]:
df.isna().sum() # missing values

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

### a) Baseline Model

In [35]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [24]:
X = df.drop('Strength', axis = 1)
X.shape # explanatory variables

(1030, 8)

In [27]:
X.keys()

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age'],
      dtype='object')

In [29]:
columns = X.shape[1]
columns # no. of predictors

8

In [25]:
y = df['Strength']
y.shape # outcome

(1030,)

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 42) # split data into training and test sets 

Building a model with a fully conected hiddem layer, with 10 hidden units. 

In [32]:
def baseline_model(): 
    
    model = Sequential() # define an empty sequential model
    model.add(Input(shape = (columns,))) # input layer
    model.add(Dense(10, activation = 'relu')) # hidden layer of 10 nodes, and a ReLU activation function
    model.add(Dense(1)) # output layer
    
    model.compile(optimizer = 'adam', loss = 'mean_squared_error',  
                  metrics = ['accuracy']) # compile model
    
    return model

In [33]:
model = baseline_model() # baseline model

In [34]:
model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 50, verbose = 2) # fitting the model

Epoch 1/50
26/26 - 1s - 41ms/step - accuracy: 0.0000e+00 - loss: 189718.6562 - val_accuracy: 0.0000e+00 - val_loss: 136615.5000
Epoch 2/50
26/26 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 111815.8281 - val_accuracy: 0.0000e+00 - val_loss: 79387.5156
Epoch 3/50
26/26 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 66539.4297 - val_accuracy: 0.0000e+00 - val_loss: 47073.5938
Epoch 4/50
26/26 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 40376.4883 - val_accuracy: 0.0000e+00 - val_loss: 28091.3535
Epoch 5/50
26/26 - 0s - 6ms/step - accuracy: 0.0000e+00 - loss: 24562.1934 - val_accuracy: 0.0000e+00 - val_loss: 16557.7520
Epoch 6/50
26/26 - 0s - 6ms/step - accuracy: 0.0000e+00 - loss: 14784.1094 - val_accuracy: 0.0000e+00 - val_loss: 9492.8770
Epoch 7/50
26/26 - 0s - 6ms/step - accuracy: 0.0000e+00 - loss: 8749.4990 - val_accuracy: 0.0000e+00 - val_loss: 5432.5703
Epoch 8/50
26/26 - 0s - 6ms/step - accuracy: 0.0000e+00 - loss: 5274.3452 - val_accuracy: 0.0000e+00 - val_loss: 3128.8767
E

<keras.src.callbacks.history.History at 0x7f33933f3a50>

In [37]:
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)

print('Mean Squared Error on Test Data: {:.4f}'.format(mse))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Mean Squared Error on Test Data: 239.0229


Repeat process **50 times**.

In [38]:
mse_values = [] # empty list to store MSE values

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 42) # splitting data 
    model = baseline_model() # defining model
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 50, verbose = 2) # fitting model
    predictions = model.predict(X_test) 
    mse = mean_squared_error(y_test, predictions) # calculate MSE test value

    mse_values.append(mse) # append value to list

Epoch 1/50
23/23 - 1s - 43ms/step - accuracy: 0.0000e+00 - loss: 488835.8750 - val_accuracy: 0.0000e+00 - val_loss: 391104.1875
Epoch 2/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 316483.3125 - val_accuracy: 0.0000e+00 - val_loss: 250229.9531
Epoch 3/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 200324.0781 - val_accuracy: 0.0000e+00 - val_loss: 157765.3438
Epoch 4/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 124955.2656 - val_accuracy: 0.0000e+00 - val_loss: 97979.4453
Epoch 5/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 76739.4531 - val_accuracy: 0.0000e+00 - val_loss: 59558.4648
Epoch 6/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 46028.3984 - val_accuracy: 0.0000e+00 - val_loss: 35725.4414
Epoch 7/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 27122.3164 - val_accuracy: 0.0000e+00 - val_loss: 21017.8965
Epoch 8/50
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 15612.8125 - val_accuracy: 0.0000e+00 - val_loss: 122

In [41]:
len(mse_values)

50

In [39]:
mse_mean = np.mean(mse_values) # mean
mse_std = np.std(mse_values) # standard deviation

print('Mean of MSE Values: {:.4f}'.format(mse_mean))
print('Standard Deviation of MSE Values: {:.4f}'.format(mse_std))

Mean of MSE Values: 291.4860
Standard Deviation of MSE Values: 330.6066


### b) Normalizing Data

In [40]:
X_norm = (X - X.mean()) / X.std()
X_norm.head() # normalizing data

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [42]:
mse_values = [] # empty list to store MSE values

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size = 0.30, random_state = 42) # splitting data 
    model = baseline_model() # defining model
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 50, verbose = 2) # fitting model
    predictions = model.predict(X_test) 
    mse = mean_squared_error(y_test, predictions) # calculate MSE test value

    mse_values.append(mse) # append value to list

Epoch 1/50
23/23 - 1s - 42ms/step - accuracy: 0.0000e+00 - loss: 1573.6317 - val_accuracy: 0.0000e+00 - val_loss: 1470.6350
Epoch 2/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1555.8057 - val_accuracy: 0.0000e+00 - val_loss: 1453.2401
Epoch 3/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1537.2920 - val_accuracy: 0.0000e+00 - val_loss: 1435.8401
Epoch 4/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1518.5302 - val_accuracy: 0.0000e+00 - val_loss: 1418.2073
Epoch 5/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1499.1295 - val_accuracy: 0.0000e+00 - val_loss: 1399.7548
Epoch 6/50
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 1478.6241 - val_accuracy: 0.0000e+00 - val_loss: 1380.9928
Epoch 7/50
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 1457.5579 - val_accuracy: 0.0000e+00 - val_loss: 1360.8680
Epoch 8/50
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 1435.1761 - val_accuracy: 0.0000e+00 - val_loss: 1339.7528
Epoch 9/50
23/2

In [43]:
mse_mean = np.mean(mse_values) # mean
mse_std = np.std(mse_values) # standard deviation

print('Mean of MSE Values: {:.4f}'.format(mse_mean))
print('Standard Deviation of MSE Values: {:.4f}'.format(mse_std))

Mean of MSE Values: 332.4023
Standard Deviation of MSE Values: 82.4326


The mean MSE value increased, but the estimate became more robust due to a significant decrease in the standard deviation.

### c) Increasing Number of Epochs

In [44]:
mse_values = [] # empty list to store MSE values

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size = 0.30, random_state = 42) # splitting data 
    model = baseline_model() # defining model
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 100, verbose = 2) # fitting model
    predictions = model.predict(X_test) 
    mse = mean_squared_error(y_test, predictions) # calculate MSE test value

    mse_values.append(mse) # append value to list

Epoch 1/100
23/23 - 1s - 43ms/step - accuracy: 0.0000e+00 - loss: 1584.2384 - val_accuracy: 0.0000e+00 - val_loss: 1487.3573
Epoch 2/100
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1567.4622 - val_accuracy: 0.0000e+00 - val_loss: 1472.0466
Epoch 3/100
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1550.6494 - val_accuracy: 0.0000e+00 - val_loss: 1456.3029
Epoch 4/100
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1533.4856 - val_accuracy: 0.0000e+00 - val_loss: 1439.9124
Epoch 5/100
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 1515.1448 - val_accuracy: 0.0000e+00 - val_loss: 1423.5259
Epoch 6/100
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 1496.7124 - val_accuracy: 0.0000e+00 - val_loss: 1405.7423
Epoch 7/100
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 1476.7843 - val_accuracy: 0.0000e+00 - val_loss: 1387.2369
Epoch 8/100
23/23 - 0s - 7ms/step - accuracy: 0.0000e+00 - loss: 1455.7301 - val_accuracy: 0.0000e+00 - val_loss: 1367.7344
Epoch 9

In [45]:
mse_mean = np.mean(mse_values) # mean
mse_std = np.std(mse_values) # standard deviation

print('Mean of MSE Values: {:.4f}'.format(mse_mean))
print('Standard Deviation of MSE Values: {:.4f}'.format(mse_std))

Mean of MSE Values: 162.0541
Standard Deviation of MSE Values: 38.0426


Both the mean and standard deviation decreased significantly compared to the previous results. However, the computational time increased considerably.

### d) Increasing Number of Hidden Layers

Updating model.

In [46]:
def model_updated(): 
    
    model = Sequential() # define an empty sequential model
    model.add(Input(shape = (columns,))) # input layer
    model.add(Dense(10, activation = 'relu')) # 1st hidden layer
    model.add(Dense(10, activation = 'relu')) # 2nd hidden layer
    model.add(Dense(10, activation = 'relu')) # 3rd hidden layer
    model.add(Dense(1)) # output layer
    
    model.compile(optimizer = 'adam', loss = 'mean_squared_error',  
                  metrics = ['accuracy']) # compile model
    
    return model

In [47]:
mse_values = [] # empty list to store MSE values

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size = 0.30, random_state = 42) # splitting data 
    model =  model_updated() # defining model
    model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 50, verbose = 2) # fitting model
    predictions = model.predict(X_test) 
    mse = mean_squared_error(y_test, predictions) # calculate MSE test value

    mse_values.append(mse) # append value to list

Epoch 1/50
23/23 - 2s - 72ms/step - accuracy: 0.0000e+00 - loss: 1594.7684 - val_accuracy: 0.0000e+00 - val_loss: 1492.8827
Epoch 2/50
23/23 - 0s - 9ms/step - accuracy: 0.0000e+00 - loss: 1573.5817 - val_accuracy: 0.0000e+00 - val_loss: 1473.7461
Epoch 3/50
23/23 - 0s - 10ms/step - accuracy: 0.0000e+00 - loss: 1552.8466 - val_accuracy: 0.0000e+00 - val_loss: 1451.6702
Epoch 4/50
23/23 - 0s - 9ms/step - accuracy: 0.0000e+00 - loss: 1526.1885 - val_accuracy: 0.0000e+00 - val_loss: 1420.6757
Epoch 5/50
23/23 - 0s - 9ms/step - accuracy: 0.0000e+00 - loss: 1485.8258 - val_accuracy: 0.0000e+00 - val_loss: 1373.8204
Epoch 6/50
23/23 - 0s - 9ms/step - accuracy: 0.0000e+00 - loss: 1424.7759 - val_accuracy: 0.0000e+00 - val_loss: 1299.7179
Epoch 7/50
23/23 - 0s - 9ms/step - accuracy: 0.0000e+00 - loss: 1325.5609 - val_accuracy: 0.0000e+00 - val_loss: 1178.6713
Epoch 8/50
23/23 - 0s - 8ms/step - accuracy: 0.0000e+00 - loss: 1169.9537 - val_accuracy: 0.0000e+00 - val_loss: 1002.8954
Epoch 9/50
23/

In [48]:
mse_mean = np.mean(mse_values) # mean
mse_std = np.std(mse_values) # standard deviation

print('Mean of MSE Values: {:.4f}'.format(mse_mean))
print('Standard Deviation of MSE Values: {:.4f}'.format(mse_std))

Mean of MSE Values: 124.9242
Standard Deviation of MSE Values: 12.7226


Both values decreased significantly. Increasing the number of hidden layers and the number of epochs improved the accuracy.

***

Diego Godinez Bravo\
January 2025