In [1]:
import numpy as np
#import implemented model
from LinearRegression import Linear_Regression
from sklearn.datasets import make_regression


# Generate Dummy Dataset
- Here we will generate a dummy dataset using the `make_regression` function from `sklearn`.

In [2]:
X,y = make_regression(n_samples=1500, n_features=10, n_informative=7,
                      n_targets=1, bias=0.0, noise=1.0, shuffle=True, random_state=32)

#split dataset into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=23,shuffle=True)


In [8]:
X_train, y_train

(array([[ 0.13792668, -0.40585928,  0.04189068, ...,  0.15302829,
          0.55092677, -0.56761983],
        [-0.57508072,  0.39142555,  1.39990135, ..., -1.1645281 ,
         -0.08069095,  2.08162623],
        [-1.99026198,  1.77698245,  0.1203932 , ..., -0.30601227,
          0.36804993, -0.949436  ],
        ...,
        [-1.10691659,  0.36865197,  2.29064396, ...,  0.93009293,
          0.23391897, -0.09136163],
        [ 1.58586844,  1.24773385,  0.73549134, ..., -0.33410119,
         -1.33061374,  0.90498398],
        [ 0.29660699,  0.67185486,  0.19660167, ...,  1.44854214,
         -0.4253674 , -1.0458597 ]]),
 array([-120.53489545, -232.36145651,   62.22982777, ...,  196.26423693,
         -34.32024192,  -75.16201357]))

## Batch Gradient Descent Implementation
- Now I will test the Batch Gradient Descent implementation on our dummy dataset

In [4]:
#Test implemented model
bgd_estimator = Linear_Regression(optimizer='bgd')
bgd_estimator.fit(X_train, y_train,)

preds= bgd_estimator.predict(X_test)
preds

array([[ 464.89954957],
       [  95.32369383],
       [ -90.13555069],
       [ -48.7770355 ],
       [ -47.54384934],
       [  32.68614392],
       [-195.18770146],
       [  52.58207581],
       [ 217.21766959],
       [  94.74085066],
       [ -67.83106614],
       [-141.51705823],
       [-106.30962748],
       [ 186.07911325],
       [-169.59046816],
       [  43.28102749],
       [ -29.24088075],
       [ -95.43195106],
       [  -2.57391473],
       [ -44.88306377],
       [  12.93099989],
       [ 292.70346331],
       [ -55.02062019],
       [   8.30025275],
       [ 359.24046548],
       [  12.15398764],
       [  67.7506446 ],
       [-267.27274534],
       [-134.58610578],
       [ -43.38137433],
       [-279.85334144],
       [   5.36189433],
       [  23.78238435],
       [ 181.1074966 ],
       [  40.75856299],
       [-100.02799671],
       [  95.74385835],
       [-227.86579533],
       [ -99.59134969],
       [ 154.60011165],
       [-148.00935555],
       [ 206.647

In [5]:
print(f"Model MSE performance: {bgd_estimator.score(y_test,preds)}")

Model MSE performance: 46653.22338921421


In [6]:
print(f"Model learned coefficients {bgd_estimator.coef_}")
print(f"Model learned intercept {bgd_estimator.intercept_}")

Model learned coefficients [[ 3.19284820e-02]
 [ 5.02208860e+01]
 [ 2.98365314e+01]
 [-4.48147125e-03]
 [ 9.60745885e+01]
 [ 2.79322525e+01]
 [-2.69217971e-02]
 [ 7.32115701e+01]
 [ 3.82622148e+01]
 [ 5.27043896e+01]]
Model learned intercept [-0.00956265]


## Ordinary Least Square Implementation
- Now I will test the ordinary least square implementation on our dummy dataset

In [9]:
#Test implemented model
ols_estimator = Linear_Regression(optimizer='ols')
ols_estimator.fit(X_train, y_train)

preds= ols_estimator.predict(X_test)
preds

array([[ 464.89959132],
       [  95.32368384],
       [ -90.13562781],
       [ -48.77710187],
       [ -47.5438869 ],
       [  32.68613853],
       [-195.18775999],
       [  52.5820187 ],
       [ 217.21769053],
       [  94.74087302],
       [ -67.83115119],
       [-141.51707273],
       [-106.30960523],
       [ 186.07910992],
       [-169.59047517],
       [  43.28101166],
       [ -29.24091452],
       [ -95.43194404],
       [  -2.57396926],
       [ -44.88312812],
       [  12.93101876],
       [ 292.70348929],
       [ -55.02068143],
       [   8.30021252],
       [ 359.24047495],
       [  12.15395856],
       [  67.7505916 ],
       [-267.27274049],
       [-134.58612673],
       [ -43.38141385],
       [-279.8534229 ],
       [   5.36190881],
       [  23.78238348],
       [ 181.10748393],
       [  40.75853785],
       [-100.02802755],
       [  95.74387433],
       [-227.86589405],
       [ -99.59136917],
       [ 154.60014969],
       [-148.00939487],
       [ 206.647

In [10]:
print(f"Model MSE performance: {ols_estimator.score(y_test,preds)}")

Model MSE performance: 46653.23009193502


In [11]:
print(f"Model learned coefficients {ols_estimator.coef_}")
print(f"Model learned intercept {ols_estimator.intercept_}")

Model learned coefficients [[ 3.19093762e-02]
 [ 5.02208939e+01]
 [ 2.98365296e+01]
 [-4.47069316e-03]
 [ 9.60745916e+01]
 [ 2.79322527e+01]
 [-2.69159264e-02]
 [ 7.32115959e+01]
 [ 3.82622159e+01]
 [ 5.27044007e+01]]
Model learned intercept [-0.00953915]


#### The weights recorded will using both optimization methods are approximately the same with error boundary of |0.00001|.