# Final Project


## Step 1: Download and Clean the  Data Set


Import libraries needed

In [1]:
!pip install numpy==2.0.2
!pip install pandas==2.2.2
!pip install tensorflow_cpu==2.18.0



In [2]:
import pandas as pd
import numpy as np
import keras

import warnings
warnings.simplefilter('ignore', FutureWarning)

2024-12-22 09:49:29.797531: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-22 09:49:29.852416: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Download data and load using pandas


In [3]:
filepath='https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv'
concrete_data = pd.read_csv(filepath)

concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


#### We already know that there are no missing values so we can skip the analysis of the data


#### Split data into predictors and target


In [4]:
concrete_data_columns = concrete_data.columns

In [5]:
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

Normalize data substracting mean and dividing by deviation.

In [7]:
n_cols = predictors.shape[1] # number of predictors necessary for the input layer

##  Model creation with Keras

In [8]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input

Define a function that creates the model

In [9]:
def regression_model():
    '''
    This function uses the Sequential class of Keras to build
    the model for the first task of the project
    Steps:
        1. Create a sequential NN stored in model
        2. Define input with n_cols shape
        3. Add a hidden layer of 10 neurons and relu activation
        4. Output layer with one neuron
        5. Compile model with adam optimizer and MSE as loss
    '''
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

## Train and Test the Network


In order to evaluate the performance of the model, we will run 50 loops in which we will divide
the data into train and test. Testing will be 30% of the data and we will compute in each loop the
MSE.

In [10]:
!pip install scikit-learn



In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [13]:
mse_list = [] # container for mse
for fold in range(50):
    x_train, x_test, y_train, y_test = train_test_split(predictors, target, test_size = 0.3)
    model = regression_model() # create model
    model.fit(x_train, y_train, validation_split = 0.2, epochs = 50, verbose = 0) # train model
    y_pred = model.predict(x_test, verbose = 0)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

# Report results
mse_results = np.array(mse_list)
mean_mse = f"The mean of the MSE is: {mse_results.mean():.2f}" # Create string for report
print(mean_mse)
std_mse = f"The std of the MSE is: {mse_results.std():.2f}"
print(std_mse)

The mean of the MSE is: 442.74
The std of the MSE is: 548.72


## Task B: Repeat but normalizing data

Normalize data substracting mean and dividing by std

In [15]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()

In [16]:
mse_list = [] # container for mse
for fold in range(50):
    x_train, x_test, y_train, y_test = train_test_split(predictors_norm, target, test_size = 0.3)
    model = regression_model() # create model
    model.fit(x_train, y_train, validation_split = 0.2, epochs = 50, verbose = 0) # train model
    y_pred = model.predict(x_test, verbose = 0)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

# Report results
mse_results = np.array(mse_list)
mean_mse = f"The mean of the MSE is: {mse_results.mean():.2f}" # Create string for report
print(mean_mse)
std_mse = f"The std of the MSE is: {mse_results.std():.2f}"
print(std_mse)

The mean of the MSE is: 570.41
The std of the MSE is: 135.57


#### The squared error increased from step A

## Task C: Increase epoch to 100

In [20]:
mse_list = [] # container for mse
for fold in range(50):
    x_train, x_test, y_train, y_test = train_test_split(predictors_norm, target, test_size = 0.3)
    model = regression_model() # create model
    model.fit(x_train, y_train, validation_split = 0.2, epochs = 100, verbose = 0) # train model with 100 epochs
    y_pred = model.predict(x_test, verbose = 0)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

# Report results
mse_results = np.array(mse_list)
mean_mse = f"The mean of the MSE is: {mse_results.mean():.2f}" # Create string for report
print(mean_mse)
std_mse = f"The std of the MSE is: {mse_results.std():.2f}"
print(std_mse)

The mean of the MSE is: 204.71
The std of the MSE is: 53.54


#### The error decreased compared to task B

# Task D: Increase number of hidden layers

### Redefine model

In [21]:
def regression_model2():
    '''
    This function uses the Sequential class of Keras to build
    the model for the first task of the project
    Steps:
        1. Create a sequential NN stored in model
        2. Define input with n_cols shape
        3. Add 3 hidden layers of 10 neurons and relu activation
        4. Output layer with one neuron
        5. Compile model with adam optimizer and MSE as loss
    '''
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    model.add(Dense(10, activation='relu')) # First hidden layer
    model.add(Dense(10, activation='relu')) # Second hidden layer
    model.add(Dense(10, activation='relu')) # Third hidden layer
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [22]:
mse_list = [] # container for mse
for fold in range(50):
    x_train, x_test, y_train, y_test = train_test_split(predictors_norm, target, test_size = 0.3)
    model = regression_model2() # create model
    model.fit(x_train, y_train, validation_split = 0.2, epochs = 50, verbose = 0) # train model
    y_pred = model.predict(x_test, verbose = 0)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

# Report results
mse_results = np.array(mse_list)
mean_mse = f"The mean of the MSE is: {mse_results.mean():.2f}" # Create string for report
print(mean_mse)
std_mse = f"The std of the MSE is: {mse_results.std():.2f}"
print(std_mse)

The mean of the MSE is: 146.39
The std of the MSE is: 11.59


#### The error decresed compared to task B