## A. Build a baseline model 

In [1]:
# - One hidden layer of 10 nodes, and a ReLU activation function
# - Use the adam optimizer and the mean squared error  as the loss function.
# 1. Randomly split the data into a training and test sets by holding 30% of the data for testing.
# 2. Train the model on the training data using 50 epochs.
# 3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength.
# 4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.
# 5. Report the mean and the standard deviation of the mean squared errors.

In [2]:
#importing required libraries
import keras
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [6]:
#loading the dataset
concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()

#creating target variable and labels
X = concrete_data.drop('Strength', axis=1)
y = concrete_data['Strength']

In [7]:
#building a baseline model 
def baseline_model():
    
    model = Sequential()
    
    #1 hidden layer of 10 nodes, and a ReLU activation function
    model.add(Dense(10, activation='relu', input_shape=(X.shape[1],))) 
    model.add(Dense(1))
    
    #adam optimizer and the mean squared error  as the loss function.
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [8]:
mse_list = [] #create a list of 50 mean squared errors.

#4. Repeat steps 1 - 3, 50 times
for _ in range(50):
    
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    model = baseline_model() #model building 
    
    #2. Train the model on the training data using 50 epochs.
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    #3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength.
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse) #list of 50 mean squared errors.

#5. Report the mean and the standard deviation of the mean squared errors.
mean_mse = sum(mse_list)/len(mse_list)
sd_mse = pd.Series(mse_list).std()





In [9]:
# 5. Report the mean and the standard deviation of the mean squared errors.
print(f'Mean Squared Error: {mean_mse}')
print(f'Standard Deviation of MSE: {sd_mse}')

Mean Squared Error: 338.9050385495959
Standard Deviation of MSE: 388.5546070809698


## B. Normalize the data 


In [10]:
#normalize the data by subtracting the mean and dividing by SD

X_train_norm = (X_train - X_train.mean() / X_train.std())
X_train.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
196,194.7,0.0,100.5,165.6,7.5,1006.4,905.9,28
631,325.0,0.0,0.0,184.0,0.0,1063.0,783.0,7
81,318.8,212.5,0.0,155.7,14.3,852.1,880.4,3
526,359.0,19.0,141.0,154.0,10.9,942.0,801.0,3
830,162.0,190.0,148.0,179.0,19.0,838.0,741.0,28


In [11]:
#repeating part A with normalised data
mse_list = [] #create a list of 50 mean squared errors.

#4. Repeat steps 1 - 3, 50 times
for _ in range(50):
    
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    #normalising in the loop
    X_train_norm = (X_train - X_train.mean() / X_train.std())
    X_train.head()
    
    model = baseline_model() #model building 
    
    #2. Train the model on the training data using 50 epochs.
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    #3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength.
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse) #list of 50 mean squared errors.

#5. Report the mean and the standard deviation of the mean squared errors.
mean_mse = sum(mse_list)/len(mse_list)
sd_mse = pd.Series(mse_list).std()

print(f'Mean Squared Error: {mean_mse}')
print(f'Standard Deviation of MSE: {sd_mse}')

Mean Squared Error: 341.1659642644952
Standard Deviation of MSE: 605.0608481204813


#### How does the mean of the mean squared errors compare to that from Step A?
* Standard Deviation of MSE After Normalisation : 605.0608481204813

    * * Mean Squared Error After Normalisation : 341.1659642644952

* Mean Squared Error (MSE) Before Normalisation : 338.9050385495959

    * * Standard Deviation of MSE Before Normalisation : 388.5546070809698

## C. Increase the number of epochs


In [12]:
#repeating part A and B 
mse_list = [] #create a list of 50 mean squared errors.

#4. Repeat steps 1 - 3, 50 times
for _ in range(50):
    
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    #normalising in the loop
    X_train_norm = (X_train - X_train.mean() / X_train.std())
    X_train.head()
    
    model = baseline_model() #model building 
    
   # Train the model on the training data using 100 epochs.
    model.fit(X_train, y_train, epochs=100, verbose=0)
    
    #3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength.
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse) #list of 50 mean squared errors.

#. Report the mean and the standard deviation of the mean squared errors.
mean_mse = sum(mse_list)/len(mse_list)
sd_mse = pd.Series(mse_list).std()

print(f'Mean Squared Error after 100 epochs: {mean_mse}')
print(f'Standard Deviation of MSE after 100 epochs: {sd_mse}')

Mean Squared Error after 100 epochs: 154.3079255584409
Standard Deviation of MSE after 100 epochs: 144.21627896969625


#### How does the mean of the mean squared errors compare to that from Step B?
* Standard Deviation of MSE After Normalisation PART B: 605.0608481204813

    * * Mean Squared Error After Normalisation PART B: 341.1659642644952

* Standard Deviation of MSE After Normalisation after 100 epochs: 154.3079255584409
    * *Standard Deviation of MSE after 100 epochs: 144.21627896969625

# D. Increase the number of hidden layers

In [13]:
#- Three hidden layers, each of 10 nodes and ReLU activation function.

def baseline_model_new():
        
        model = Sequential()
        
        #Three hidden layers, each of 10 nodes and ReLU activation function.
        model.add(Dense(10, activation='relu', input_shape=(X.shape[1],))) 
        model.add(Dense(10, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1))
        
        #adam optimizer and the mean squared error  as the loss function.
        model.compile(optimizer='adam', loss='mean_squared_error')
        return model

In [14]:
#repeating part B
mse_list = [] #create a list of 50 mean squared errors.

#4. Repeat steps 1 - 3, 50 times
for _ in range(50):
    
    # 1. Randomly split the data into a training and test sets by holding 30% of the data for testing.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    #normalising in the loop
    X_train_norm = (X_train - X_train.mean() / X_train.std())
    X_train.head()
    
    model = baseline_model_new() #model with Three hidden layers, each of 10 nodes and ReLU activation function.
    
    #2. Train the model on the training data using 50 epochs.
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    #3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength.
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse) #list of 50 mean squared errors.

#5. Report the mean and the standard deviation of the mean squared errors.
mean_mse = sum(mse_list)/len(mse_list)
sd_mse = pd.Series(mse_list).std()

print(f'Mean Squared Error of New Model: {mean_mse}')
print(f'Standard Deviation of MSE New Model: {sd_mse}')

Mean Squared Error of New Model: 152.4764182016884
Standard Deviation of MSE New Model: 192.22245006393513


#### How does the mean of the mean squared errors compare to that from Step B?
* Standard Deviation of MSE After Normalisation PART B: 605.0608481204813

    * * Mean Squared Error After Normalisation PART B: 341.1659642644952

* Standard Deviation of MSE New Model: 192.22245006393513
    * * Mean Squared Error of New Model: 152.4764182016884