In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("concrete_data.csv")
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [4]:
df.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.6 KB


In [6]:
df.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [7]:
df.shape

(1030, 9)

In [8]:
x_train, x_test, y_train, y_test = train_test_split(df.drop('Strength', axis = 1), df['Strength'], test_size=0.2)
x_train.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
104,318.8,212.5,0.0,155.7,14.3,852.1,880.4,7
107,323.7,282.8,0.0,183.8,10.3,942.7,659.9,7
117,313.3,262.2,0.0,175.5,8.6,1046.9,611.8,28
589,339.2,0.0,0.0,185.7,0.0,1069.2,754.3,28
553,239.6,359.4,0.0,185.7,0.0,941.6,664.3,7


In [9]:
scaler = StandardScaler()

x_train_scaled = pd.DataFrame(
    scaler.fit_transform(x_train),
    columns = x_train.columns,
    index = x_train.index
)
x_test_scaled = pd.DataFrame(
    scaler.transform(x_test),
    columns = x_test.columns,
    index = x_test.index
)

In [10]:
x_train_scaled.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
104,0.367745,1.577176,-0.851369,-1.206928,1.342979,-1.522731,1.324773,-0.620419
107,0.414232,2.38518,-0.851369,0.114513,0.674107,-0.36544,-1.41133,-0.620419
117,0.315566,2.148411,-0.851369,-0.275806,0.389837,0.965571,-2.008185,-0.287518
589,0.561282,-0.865223,-0.851369,0.203863,-1.048237,1.250423,-0.239955,-0.287518
553,-0.383632,3.265593,-0.851369,0.203863,-1.048237,-0.379491,-1.356732,-0.620419


In [11]:
x_test_scaled.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
376,0.910406,-0.865223,-0.851369,1.562924,-1.048237,0.411196,-0.139445,-0.287518
364,-0.617962,-0.246865,1.044808,-1.211631,0.557055,0.549151,0.086392,-0.683829
114,0.783279,1.307076,-0.851369,-0.774286,0.891491,-0.339893,-0.221342,-0.620419
406,-1.091368,-0.865223,1.382355,-0.826015,-1.048237,0.438021,1.57915,-0.683829
1029,-0.181557,0.289888,0.366602,0.904555,0.389837,-1.364338,-0.150613,-0.287518


In [12]:
x_test_scaled.shape

(206, 8)

In [13]:
y_test.shape

(206,)

In [14]:
# MLR from scratch

class multi_reg_imp:

    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, x_train, y_train):
        x_train_scaled = np.insert(x_train, 0, 1, axis=1)

        # evaluating coeffs

        beta = np.linalg.inv(np.dot(x_train_scaled.T, x_train_scaled)).dot(x_train_scaled.T).dot(y_train)
        self.intercept_ = beta[0]
        self.coef_ = beta[1:]

    def predict(self, x_test):
        y_pred = self.intercept_ + np.dot(x_test, self.coef_)
        return y_pred

In [15]:
multi_imp = multi_reg_imp()

In [16]:
multi_imp.fit(x_train_scaled, y_train)

In [17]:
y_pred = multi_imp.predict(x_test)

In [18]:
x_test_scaled.shape

(206, 8)

In [19]:
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
adj_r2 = 1 - ((1 - r2) * (206 - 1)) / (206 - 1 - 8)

In [20]:
print(r2)
print(mse)
print(mae)
print(rmse)
print(adj_r2)

-355077.77019928914
88428740.93440348
9328.206762853597
9403.65572181391
-369497.2126439303


In [21]:
multi_imp.coef_

array([14.00757616, 10.14375272,  6.8809011 , -2.40150285,  1.97879279,
        2.23118799,  2.858418  ,  6.93766484])

In [22]:
multi_imp.intercept_

np.float64(36.19859223300971)

In [23]:
lr = LinearRegression()

In [24]:
lr.fit(x_train_scaled, y_train)

In [25]:
y_pred2 = lr.predict(x_test)
y_pred2

array([ 9400.1943617 ,  8591.45307865, 10976.22041826,  7781.82430128,
        9084.07500074,  7970.99811485,  9041.14051939,  8867.05575242,
        9007.65089761,  8120.8769309 , 10842.7958183 , 10097.61810435,
        8793.84464372,  9618.57069348,  8269.9898958 ,  8448.6562693 ,
        9049.5799121 ,  7930.93198109,  7276.1341342 ,  8399.42807545,
        7947.43530998,  8263.47891162,  9761.27123764, 11083.55201251,
       10092.50885177,  8411.69759052,  8321.17719279,  8425.32641429,
        9084.46361997,  8093.48327676, 10508.40003451,  8898.9399437 ,
        9146.01931279, 10052.66452853, 11189.42965327,  9158.06114293,
        7624.39775189, 11247.15241288,  8640.32963112, 11558.98426455,
       10948.46975892,  9089.10765648,  9430.53198375,  8492.46848052,
       11287.80749564,  9469.73645532,  9202.80053988,  8769.13048087,
       11515.12912223, 10385.66107295,  9152.25651059,  7887.84147618,
       10379.84446242,  9538.339644  ,  9472.87973191, 11757.94739152,
      

In [26]:
r2_lr = r2_score(y_test, y_pred2)
mse_lr = mean_squared_error(y_test, y_pred2)
mae_lr = mean_absolute_error(y_test, y_pred2)
rmse_lr = np.sqrt(mse_lr)
adj_r2_lr = 1 - ((1 - r2_lr) * (206 - 1)) / (206 - 1 - 8)

In [27]:
print(r2_lr)
print(mse_lr)
print(mae_lr)
print(rmse_lr)
print(adj_r2_lr)

-355077.77019929845
88428740.9344058
9328.206762853722
9403.655721814033
-369497.21264394


In [28]:
# applying gradient descent

# for univariate implementation

# class grad_desc:

#     def __init__(self, learning_rate=0.01, epochs=100):
#         self.m = 100
#         self.b = -120
#         self.learning_rate = learning_rate
#         self.epochs = epochs

#     def fit(self, x_train_scaled, y_train):
#         for i in range(self.epochs):
#             loss_slope_b = 2 * np.sum(y_train.values.ravel() - self.m * x_train_scaled.values.ravel() - self.b)
#             loss_slope_m = -2 * np.sum(y_train.values.ravel() - self.m * x_train_scaled - self.b) * x_train_scaled

#             self.b = self.b - self.learning_rate * loss_slope_b
#             self.m = self.m - self.learning_rate * loss_slope_m

#             print(self.m)
#             print(self.b)

#     def predict(self, x_test):
#         return self.m * x_train_scaled + self.b

In [29]:
x_train_scaled.shape[1]

8

In [30]:
# Batch GD

class batch_gd:

    def __init__(self, learning_rate=0.1, epochs=100):
        self.coef_ = None
        self.intercept_ = None
        self.learning_rate = learning_rate
        self.epochs = epochs

    def fit(self, x_train, y_train):

        # init coef and bias 

        self.intercept_ = 0
        self.coef_ = np.ones(x_test.shape[1])

        for i in range(self.epochs):
            
            # updates all coef and intercept

            y_hat = self.intercept_ + (x_train @ self.coef_)
            
            # finding intercept

            intercept_der = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ - (self.learning_rate * intercept_der)
            
            # finding coeff

            coef_der = -2 * np.dot((y_train - y_hat), x_train) / x_train.shape[0]
            self.coef_ = self.coef_ - (self.learning_rate * coef_der)

        print(self.intercept_)
        print(self.coef_)

    def predict(self, x_test):
        return self.intercept_ + (x_test @ self.coef_)

In [31]:
bgd = batch_gd()

In [32]:
bgd.fit(x_train_scaled, y_train)

36.19859222563592
[10.88025916  7.12872615  4.18510091 -4.85832171  1.82713156 -0.07366771
 -0.07858946  6.87500052]


In [33]:
y_pred_bgd = bgd.predict(x_test)

In [34]:
y_test.shape, y_pred_bgd.shape

((206,), (206,))

In [35]:
x_test_scaled.shape

(206, 8)

In [36]:
r2_bgd = r2_score(y_test, y_pred_bgd)
mse_bgd = mean_squared_error(y_test, y_pred_bgd)
mae_bgd = mean_absolute_error(y_test, y_pred_bgd)
rmse_bgd = np.sqrt(mse_bgd)
adj_r2_bgd = 1 - ((1 - r2_bgd) * (206 - 1) / (206 - 1 - 8))

In [37]:
print(r2_lr, r2_bgd)
print(mse_lr, mse_bgd)
print(mae_lr, mae_bgd)
print(rmse_bgd, rmse_bgd)
print(adj_r2_lr, adj_r2_bgd)

-355077.77019929845 -43463.349081718676
88428740.9344058 10824352.192817956
9328.206762853722 3108.0912486641087
3290.0383269527356 3290.0383269527356
-369497.21264394 -45228.39879062096


In [38]:
# Mini Batch GD

# from scratch

import random

class mini_bgd:

    def __init__(self, batch_size, learning_rate=0.01, epochs=100):
        self.coef_ = None
        self.intercept_ = None
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self, x_train, y_train):
        self.intercept_ = 0
        self.coef_ = np.ones(x_train.shape[1])


        for i in range(self.epochs):
            for j in range(int(x_train.shape[0]/self.batch_size)):
                
                # creates random sample
                idx = random.sample(range(x_train.shape[0]), self.batch_size)
                
                x_batch = x_train.values[idx]
                y_batch = y_train.values.ravel()[idx]


                y_hat = self.intercept_ + (x_batch @ self.coef_)

                # intercept
                intercept_der = -2 * np.mean(y_batch - y_hat)
                self.intercept_ = self.intercept_ - (self.learning_rate * intercept_der)

                # coeff
                coef_der = -2 * (x_batch.T @ (y_batch - y_hat)) / self.batch_size
                self.coef_ = self.coef_ - (self.learning_rate * coef_der)

        print(self.intercept_)
        print(self.coef_)

    def predict(self, x_test):
            return self.intercept_ + (x_test @ self.coef_)

In [39]:
mbgd = mini_bgd(batch_size = 100)

In [40]:
mbgd.fit(x_train_scaled, y_train)

36.44749546344644
[10.60093488  6.76313601  3.72814744 -5.08225734  2.0225673  -0.24351175
 -0.5822075   6.95407048]


In [41]:
y_pred_mbgd = mbgd.predict(x_test)

In [42]:
x_test_scaled.shape

(206, 8)

In [43]:
r2_mbgd = r2_score(y_test, y_pred_mbgd)
mse_mbgd = mean_squared_error(y_test, y_pred_mbgd)
mae_mbgd = mean_absolute_error(y_test, y_pred_mbgd)
rmse_mbgd = np.sqrt(mse_mbgd)
adj_r2_mbgd = 1 - ((1 - r2_mbgd) * (206 - 1) / (206 - 1 - 8))

In [44]:
print(r2_bgd, r2_mbgd)
print(mse_bgd, mae_mbgd)
print(mae_bgd, mae_mbgd)
print(rmse_bgd, rmse_mbgd)
print(adj_r2_bgd, adj_r2_mbgd)

-43463.349081718676 -27609.36188232682
10824352.192817956 2387.748775558782
3108.0912486641087 2387.748775558782
3290.0383269527356 2622.227795948348
-45228.39879062096 -28730.5948521675


In [45]:
# mini batch using sklearn

from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(learning_rate='constant', eta0=0.1)

In [46]:
batch_size = 70

for i in range(100):
    idx = random.sample(range(x_train_scaled.shape[0]), batch_size)
    
    x_batch = x_train.values[idx]
    y_batch = y_train.values.ravel()[idx]

    sgd.partial_fit(x_batch, y_batch)

In [47]:
sgd.coef_

array([-7.75573110e+13, -8.69935702e+13,  1.25249890e+14,  2.26089900e+13,
       -3.47641337e+13, -6.16592325e+13, -9.93208717e+13,  1.36138943e+14])

In [48]:
sgd.intercept_

array([-4.00000911e+11])

In [49]:
y_pred_sgd = sgd.predict(x_test)



In [50]:
r2_sgd = r2_score(y_test, y_pred_sgd)
mse_sgd = mean_squared_error(y_test, y_pred_sgd)
mae_sgd = mean_absolute_error(y_test, y_pred_sgd)
rmse_sgd = np.sqrt(mse_sgd)
adj_r2_sgd = 1 - ((1 - r2_sgd) * (206 - 1) / (206 - 1 - 8))

In [51]:
print(r2_mbgd, r2_sgd)
print(mse_mbgd, mae_sgd)
print(mae_mbgd, mae_sgd)
print(rmse_mbgd, rmse_sgd)
print(adj_r2_mbgd, adj_r2_sgd)

-27609.36188232682 -9.024916074437756e+31
6876078.613844132 1.4891265773905184e+17
2387.748775558782 1.4891265773905184e+17
2622.227795948348 1.4991875024868806e+17
-28730.5948521675 -9.39141012822203e+31


In [52]:
# stochastic GD

# from scratch

class stocahstic_gd:

    def __init__(self, learning_rate=0.01, epochs=100):
        self.coef_ = None
        self.intercept_ = None
        self.learning_rate = learning_rate
        self.epochs = epochs

    def fit(self, x_train, y_train):

        self.intercept_ = 0
        self.coef_ = np.ones(x_train.shape[1])


        for i in range(self.epochs):
            for j in range(x_train.shape[0]):

                # create random index
                idx = np.random.randint(0, x_train.shape[0])
        
                x_train_sgd = x_train.values[idx]
                y_train_sgd = y_train.values.ravel()[idx]

                y_hat = self.intercept_ + (x_train_sgd @ self.coef_)

                intercept_der = -2 * (y_train_sgd - y_hat)
                self.intercept_ = self.intercept_ - (self.learning_rate * intercept_der)

                coef_der = -2 * ((y_train_sgd - y_hat) * x_train_sgd)
                self.coef_ = self.coef_ - (self.learning_rate * coef_der)

        print(self.intercept_)
        print(self.coef_)

    def predict(self, x_test):

        return self.intercept_ + (x_test @ self.coef_)

In [53]:
stgd = stocahstic_gd()

In [54]:
stgd.fit(x_train_scaled, y_train)

34.4735291961227
[14.54013833 11.07688538  7.21809165 -2.75280662  1.23359191  2.62542328
  1.94196096  7.60422896]


In [55]:
y_pred_stgd = stgd.predict(x_test)

In [56]:
r2_stgd = r2_score(y_test, y_pred_stgd)
mse_stgd = mean_squared_error(y_test, y_pred_stgd)
mae_stgd = mean_absolute_error(y_test, y_pred_stgd)
rmse_stgd = np.sqrt(mse_stgd)
adj_r2_stgd = 1 - ((1 -r2_stgd) * (206 - 1) / (206 - 1 -8))

In [57]:
print(r2_sgd, r2_stgd)
print(mae_sgd, mse_stgd)
print(mae_sgd, mae_stgd)
print(rmse_sgd, rmse_stgd)
print(adj_r2_sgd, adj_r2_stgd)

-9.024916074437756e+31 -346238.6568149051
1.4891265773905184e+17 86227450.02895503
1.4891265773905184e+17 9197.984164119127
1.4991875024868806e+17 9285.873681509727
-9.39141012822203e+31 -360299.1504926678


In [58]:
# stochastic gd using sklearn

sgd_imp = SGDRegressor(max_iter=100, learning_rate='constant', eta0=0.01)

In [59]:
sgd_imp.fit(x_train_scaled, y_train)

In [60]:
y_pred_sgd_imp = sgd_imp.predict(x_test)

In [61]:
r2_sgd_imp = r2_score(y_test, y_pred_sgd_imp)
mse_sgd_imp = mean_squared_error(y_test, y_pred_sgd_imp)
mae_sgd_imp = mean_absolute_error(y_test, y_pred_sgd_imp)
rmse_sgd_imp = np.sqrt(mse_sgd_imp)
adj_r2_sgd_imp = 1 - ((1 - r2_sgd_imp) * (206 - 1) / (206 - 1 - 8))

In [63]:
print(r2_stgd, r2_sgd_imp)
print(mae_stgd, mse_sgd_imp)
print(mae_stgd, mae_sgd_imp)
print(rmse_stgd, rmse_sgd_imp)
print(adj_r2_stgd, adj_r2_sgd_imp)

-346238.6568149051 -332678.37519868166
9197.984164119127 82850400.39750125
9197.984164119127 9026.81816615195
9285.873681509727 9102.21953138361
-360299.1504926678 -346188.19754177536
