In [None]:
import pandas as pd
import time
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from itertools import chain

In [None]:
scaler = MinMaxScaler()

raw_data = pd.read_csv('Concrete_Data.csv')
raw_data.columns = ['Cement','Blast Furnace Slag','Fly Ash', 'Water', 'Superplasticizer', 'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Concrete compressive strength']

print(raw_data)

      Cement  Blast Furnace Slag  Fly Ash  Water  Superplasticizer  \
0      540.0                 0.0      0.0  162.0               2.5   
1      540.0                 0.0      0.0  162.0               2.5   
2      332.5               142.5      0.0  228.0               0.0   
3      332.5               142.5      0.0  228.0               0.0   
4      198.6               132.4      0.0  192.0               0.0   
...      ...                 ...      ...    ...               ...   
1025   276.4               116.0     90.3  179.6               8.9   
1026   322.2                 0.0    115.6  196.0              10.4   
1027   148.5               139.4    108.6  192.7               6.1   
1028   159.1               186.7      0.0  175.6              11.3   
1029   260.9               100.5     78.3  200.6               8.6   

      Coarse Aggregate  Fine Aggregate  Age  Concrete compressive strength  
0               1040.0           676.0   28                          79.99  
1    

In [None]:
scaler.fit(raw_data)
scaled = scaler.fit_transform(raw_data)
scaled_df = pd.DataFrame(scaled, columns=raw_data.columns)

print(scaled_df)

        Cement  Blast Furnace Slag   Fly Ash     Water  Superplasticizer  \
0     1.000000            0.000000  0.000000  0.321086          0.077640   
1     1.000000            0.000000  0.000000  0.321086          0.077640   
2     0.526256            0.396494  0.000000  0.848243          0.000000   
3     0.526256            0.396494  0.000000  0.848243          0.000000   
4     0.220548            0.368392  0.000000  0.560703          0.000000   
...        ...                 ...       ...       ...               ...   
1025  0.398174            0.322760  0.451274  0.461661          0.276398   
1026  0.502740            0.000000  0.577711  0.592652          0.322981   
1027  0.106164            0.387869  0.542729  0.566294          0.189441   
1028  0.130365            0.519477  0.000000  0.429712          0.350932   
1029  0.362785            0.279633  0.391304  0.629393          0.267081   

      Coarse Aggregate  Fine Aggregate       Age  \
0             0.694767        0.205

In [None]:
scaled_df['Concrete compressive strength'] = scaled_df['Concrete compressive strength'].round(5)
true_value = scaled_df['Concrete compressive strength']
scaled_df = scaled_df.drop('Concrete compressive strength', axis=1)

print(true_value.head(5))

0    0.96748
1    0.74200
2    0.47265
3    0.48237
4    0.52286
Name: Concrete compressive strength, dtype: float64


In [None]:
data_list = scaled_df.values.tolist()
true_list = true_value.values.tolist()


print(data_list)
print(true_list)

[[1.0, 0.0, 0.0, 0.3210862619808308, 0.07763975155279502, 0.6947674418604652, 0.2057200200702458, 0.07417582417582418], [1.0, 0.0, 0.0, 0.3210862619808308, 0.07763975155279502, 0.7383720930232558, 0.2057200200702458, 0.07417582417582418], [0.5262557077625571, 0.3964941569282137, 0.0, 0.8482428115015974, 0.0, 0.3808139534883721, 0.0, 0.739010989010989], [0.5262557077625571, 0.3964941569282137, 0.0, 0.8482428115015974, 0.0, 0.3808139534883721, 0.0, 1.0], [0.22054794520547943, 0.3683917640511965, 0.0, 0.560702875399361, 0.0, 0.5156976744186048, 0.5807827395885601, 0.9862637362637363], [0.37442922374429216, 0.31719532554257096, 0.0, 0.8482428115015974, 0.0, 0.3808139534883721, 0.19066733567486205, 0.24450549450549453], [0.634703196347032, 0.2643294379521425, 0.0, 0.8482428115015974, 0.0, 0.3808139534883721, 0.0, 1.0], [0.634703196347032, 0.2643294379521425, 0.0, 0.8482428115015974, 0.0, 0.3808139534883721, 0.0, 0.07417582417582418], [0.37442922374429216, 0.31719532554257096, 0.0, 0.8482428

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_list, true_list, test_size=0.1, random_state=33)

print('Total size:', len(data_list))
print('Train size:', len(y_train))
print('Test size:', len(y_test))

Total size: 1030
Train size: 927
Test size: 103


**Normal Gradient Descent**

In [None]:
def normal_gradient_descent(feature_array, target_array, to_predict, learning_rate, num_iters):
    start_time = time.time()
    value_list = []

    # Convert the input arrays to numpy arrays
    X = np.array(feature_array)
    y = np.array(target_array)
    

    # Add a column of ones to X for the intercept term
    X = np.insert(X, 0, 1, axis=1)
    

    # Initialize the weights to zeros
    weights = np.zeros(X.shape[1])

    # Perform gradient descent for the specified number of iterations
    for i in range(num_iters):
        # Compute the predictions
        y_pred = np.dot(X, weights)

        # Compute the errors
        errors = y_pred - y

        # Compute the gradient and adjust the learning rate
        gradient = np.dot(X.T, errors) / len(y)

        # Update the weights
        weights -= learning_rate * gradient

        # Check for NaN or infinite values in the weights and break if found
        if np.isnan(weights).any() or np.isinf(weights).any():
            print("Error: Weights have become NaN or infinite!")
            break
    
    stop_time = time.time() 
    duration = stop_time - start_time

    for x in to_predict:
      X_predict = np.array([x])
      X_predict = np.insert(X_predict, 0, 1, axis=1)
      
      pred_time = round(np.dot(X_predict, weights).tolist()[0], 5)
      value_list.append(pred_time)


    return value_list, duration

**Stochastic Gradient Descent**

In [None]:
def stochastic_gradient_descent(feature_array, target_array, to_predict, learning_rate, num_iters):
    """ Computes Ordinary Least SquaresLinear Regression with Stochastic Gradient Descent as the optimization algorithm.
        :param feature_array: array with all feature vectors used to train the model
        :param target_array: array with all target vectors used to train the model
        :param to_predict: feature vector that is not contained in the training set. Used to make a new prediction
        :param learn_rate_type: algorithm used to set the learning rate at each iteration.
        :return: Predicted cooking time for the vector to_predict and the R-squared of the model.
    """
    # Pipeline of transformations to apply to an estimator. First applies Standard Scaling to the feature array.
    # Then, when the model is fitting the data it runs Stochastic Gradient Descent as the optimization algorithm.
    # The estimator is always the last element.
    
    start_time = time.time()
    value_array = []
    linear_regression_pipeline = make_pipeline(StandardScaler(), SGDRegressor(eta0=learning_rate, max_iter=num_iters))
    
    linear_regression_pipeline.fit(feature_array, target_array)
    stop_time = time.time()
     
    # print("Total runtime: %.6fs" % (stop_time - start_time))
    # print("Algorithm used to set the learning rate: " + learn_rate_type)
    print("Model Coeffiecients: " + str(linear_regression_pipeline[1].coef_))
    # print("Number of iterations: " + str(linear_regression_pipeline[1].n_iter_))
    # Make a prediction for a feature vector not in the training set
    duration = stop_time - start_time

    for x in to_predict:
      prediction = round(linear_regression_pipeline.predict([x])[0],5)
      value_array.append(prediction)

    # print("Predicted cooking time: " + str(prediction) + " minutes")
    # r_squared = np.round(linear_regression_pipeline.score(feature_array, target_array).reshape(-1, 1)[0][0], 2)
    # print("R-squared: " + str(r_squared))

    return value_array, duration

**Adaptive Learning Rate Method**

In [None]:
def adaptive_learning_rate_GD(feature_array, target_array, to_predict, learning_rate, epsilon, max_iters):
    start_time = time.time()
    value_list = []

    # add a column of ones for the intercept term
    X = np.hstack((np.ones((len(feature_array), 1)), feature_array))
    y = np.array(target_array).reshape(-1, 1)
    theta = np.zeros((X.shape[1], 1)) # initialize theta as zeros
    prev_cost = float('inf')
    alpha = learning_rate # initialize the learning rate
    for i in range(max_iters):
        # calculate the gradient
        grad = np.dot(X.T, np.dot(X, theta) - y)
        # update the learning rate based on the magnitude of the gradient
        alpha = learning_rate / (np.linalg.norm(grad) + epsilon)
        # update theta
        theta -= alpha * grad
        # calculate the cost
        cost = np.mean((np.dot(X, theta) - y)**2)
        # stop if the cost doesn't change much
        if abs(prev_cost - cost) < epsilon:
            break
        prev_cost = cost
    # add a column of ones for the intercept term to the prediction

    stop_time = time.time()

    for x in to_predict:
      X_pred = np.hstack(([1], x))
      prediction = np.dot(X_pred, theta)[0]
      value_list.append(round(prediction, 5))

    # return the prediction
    return value_list, stop_time - start_time

**Test Run**

In [None]:
learning_rate = 0.01
num_iters = 100000
epsilon = 1e-8

GD_predicted_list, GD_duration = normal_gradient_descent(X_train, y_train, X_test, learning_rate, num_iters)
SGD_predicted_list, SGD_duration = stochastic_gradient_descent(X_train, y_train, X_test, learning_rate, num_iters)
ALR_predicted_list, ALR_duration = adaptive_learning_rate_GD(X_train, y_train, X_test, learning_rate, epsilon, num_iters)

print('\nPredicted ouput of Normal Gradient Descent:', GD_predicted_list)
print('True output:                               ', y_test)
print('Time taken by Normal Gradient Descent:', '{:.5f}'.format(GD_duration), 's')
mse_GD = mean_squared_error(GD_predicted_list, y_test)
print('Mean Square Error: ' + '{:.6f}'.format(mse_GD))

print('\nPredicted ouput of Stochastic Gradient Descent:', SGD_predicted_list)
print('True output:                                   ', y_test)
print('Time taken by Stochastic Gradient Descent:', '{:.5f}'.format(SGD_duration), 's')
mse_SGD = mean_squared_error(SGD_predicted_list, y_test)
print('Mean Square Error: ' + '{:.6f}'.format(mse_SGD))

print('\nPredicted ouput of Adaptive Learning Rate Method:', ALR_predicted_list)
print('True output:                                     ', y_test)
print('Time taken by Adaptive Learning Rate Method:', '{:.5f}'.format(ALR_duration), 's')
mse_ALR = mean_squared_error(ALR_predicted_list, y_test)
print('Mean Square Error: ' + '{:.6f}'.format(mse_ALR))

Model Coeffiecients: [ 0.1026626   0.06081903  0.02573417 -0.07255033  0.03108046 -0.01342816
 -0.02442244  0.08578926]

Predicted ouput of Normal Gradient Descent: [0.36121, 0.25398, 0.2689, 0.452, 0.61734, 0.28094, 0.72808, 0.6512, 0.70275, 0.2167, 0.18358, 0.62896, 0.29032, 0.59807, 0.26848, 0.41519, 0.3954, 0.35042, 0.27678, 0.43492, 0.50865, 0.18538, 0.32621, 0.28454, 0.55849, 0.36582, 0.19842, 0.52605, 0.37296, 0.47914, 0.54483, 0.31827, 0.58886, 0.36976, 0.49828, 0.24778, 0.1904, 0.38418, 0.50071, 0.70353, 0.40483, 0.22854, 0.56425, 0.28042, 0.31278, 0.17159, 0.46173, 0.60179, 0.12341, 0.30891, 0.29671, 0.30081, 0.23709, 0.37123, 0.63653, 0.23674, 0.35809, 0.57434, 0.52922, 0.43192, 0.26447, 0.38225, 0.27233, 0.30309, 0.31305, 0.66966, 0.47269, 0.27885, 0.52672, 0.42621, 0.3131, 0.6307, 0.2325, 0.32674, 0.24176, 0.67868, 0.41078, 0.44007, 0.65449, 0.36857, 0.47948, 0.74429, 0.1291, 0.30559, 0.28138, 0.68435, 0.36706, 0.24537, 0.44242, 0.25143, 0.71074, 0.39131, 0.4963, 0.55659, 