In [434]:
import numpy as np
import pandas as pd

## Implementation
The machine learning algorithm for linear regression is given below. 

In [472]:
class LinearRegression:
    def __init__(self):
        self.w = None
        self.b = None
        self.alpha = None
        self.iter = 0
    
    # Train model with gradient descent
    # X: feature data, y: outcome data
    # costf: cost function
    # alpha: learning rate
    # n_iter: number of iterations to run
    # epsilon: threshold of cost change; if cost change < epsilon after some iteration, then gradient descent ends
    def train(self, X, y, costf=mse, alpha=0.0001, n_iter=10000, epsilon=0):
        
        # Edge cases
        if len(X.shape)!=2:
            return "Please enter valid training data. X must be 2 dimensional ndarray."
        elif y.size<2:
            return "Need at least 2 data points to do linear regression."
        elif X.shape[0]!=y.size:
            return "Number of data points in X and y do not match."
        elif self.iter>0:
            return "This model has already been trained. Please instantiate another model."
        
        point_n, feat_n = X.shape
        
        # Random w, b initialization
        w = np.random.rand(feat_n)
        b = np.random.rand()
        
        # Training model
        y_hat = np.dot(w, X.T) + b
        cost=costf(y, y_hat)
        cost_change=cost
        while (self.iter<n_iter) & (cost_change>epsilon):
            
            ## Gradient descent
            dw = np.array([(-(2/point_n)*np.dot(X[:,j], y-y_hat)) for j in range(feat_n)])
            db = -(2/point_n)*sum(y-y_hat)
            w -= alpha*dw
            b -= alpha*db
            
            y_hat = np.dot(w, X.T) + b
            cost_change= cost - costf(y, y_hat)
            cost -= cost_change
            
            self.iter+=1
            print("Iteration %d: Cost has improved by %7.5f" % (self.iter,cost_change))
            
        
        # Set attributes
        self.w=w
        self.b=b
        self.alpha=alpha
        
        return "Done"
    
    def predict(self, xp):
        return np.dot(self.w, xp.T) + self.b

    
# Given 2 1D ndarrays, gives Mean Squared Error of the two
def mse(x,y):
    if len(x) != len(y):
        return "ndarrays are length " + len(x) + " and " + len(y) + ". Please provide 2 same-length ndarrays."
    return (1/len(x))*sum([(x[i]-y[i])**2 for i in range(len(x))])

In [473]:
model = LinearRegression()

In [474]:
np.random.seed(1000)
X=np.random.rand(np.random.randint(100),np.random.randint(100))*np.random.randint(100)
y=np.random.rand(X.shape[0])*np.random.randint(100)
X, y

(array([[ 2.30013886, 19.00565729,  9.64382803, ..., 14.51054212,
          4.0698887 , 10.75023647],
        [ 4.73886007,  8.7298432 , 15.96049027, ...,  9.29096388,
          0.80733636,  7.47802319],
        [17.33836895,  2.06411054,  2.25022239, ..., 13.47591857,
          6.90809789, 10.17156583],
        ...,
        [ 0.27680951, 11.30334076,  5.2286387 , ..., 16.37635447,
         12.28129155,  2.3853471 ],
        [ 5.95153566, 14.9461481 ,  9.00838359, ...,  5.81430142,
         18.77535204,  7.41888007],
        [ 0.62270724, 17.83030094, 12.64149648, ..., 11.22010039,
          4.77262329,  9.27022144]]),
 array([18.33835223, 13.67572998, 10.11322881, 18.79927078,  4.87533284,
        12.4439719 ,  4.36763832, 22.93899565, 28.80684406, 20.43564227,
        21.03424004,  4.41576656, 17.85348113, 14.80939102, 11.29835073,
        22.46840714, 17.98523362,  8.84060745,  8.4362363 , 23.87509521,
        20.08257096, 19.47011733, 31.39846984,  5.11574971, 24.20866728,
        

In [475]:
X.shape, y.shape

((51, 87), (51,))

In [476]:
model.train(X, y, alpha=0.0001, epsilon=0.0001)

Iteration 1: Cost has improved by 74453.31626
Iteration 2: Cost has improved by 43965.96047
Iteration 3: Cost has improved by 25963.92563
Iteration 4: Cost has improved by 15334.11382
Iteration 5: Cost has improved by 9057.39015
Iteration 6: Cost has improved by 5351.04361
Iteration 7: Cost has improved by 3162.43525
Iteration 8: Cost has improved by 1870.01235
Iteration 9: Cost has improved by 1106.76592
Iteration 10: Cost has improved by 655.98769
Iteration 11: Cost has improved by 389.71751
Iteration 12: Cost has improved by 232.39842
Iteration 13: Cost has improved by 139.41605
Iteration 14: Cost has improved by 84.42670
Iteration 15: Cost has improved by 51.87498
Iteration 16: Cost has improved by 32.57572
Iteration 17: Cost has improved by 21.10516
Iteration 18: Cost has improved by 14.26055
Iteration 19: Cost has improved by 10.15061
Iteration 20: Cost has improved by 7.65844
Iteration 21: Cost has improved by 6.12440
Iteration 22: Cost has improved by 5.15885
Iteration 23: Cost

Iteration 528: Cost has improved by 0.01679
Iteration 529: Cost has improved by 0.01672
Iteration 530: Cost has improved by 0.01664
Iteration 531: Cost has improved by 0.01657
Iteration 532: Cost has improved by 0.01650
Iteration 533: Cost has improved by 0.01643
Iteration 534: Cost has improved by 0.01636
Iteration 535: Cost has improved by 0.01628
Iteration 536: Cost has improved by 0.01621
Iteration 537: Cost has improved by 0.01614
Iteration 538: Cost has improved by 0.01607
Iteration 539: Cost has improved by 0.01600
Iteration 540: Cost has improved by 0.01593
Iteration 541: Cost has improved by 0.01586
Iteration 542: Cost has improved by 0.01580
Iteration 543: Cost has improved by 0.01573
Iteration 544: Cost has improved by 0.01566
Iteration 545: Cost has improved by 0.01559
Iteration 546: Cost has improved by 0.01553
Iteration 547: Cost has improved by 0.01546
Iteration 548: Cost has improved by 0.01539
Iteration 549: Cost has improved by 0.01533
Iteration 550: Cost has improved

Iteration 1017: Cost has improved by 0.00251
Iteration 1018: Cost has improved by 0.00250
Iteration 1019: Cost has improved by 0.00249
Iteration 1020: Cost has improved by 0.00248
Iteration 1021: Cost has improved by 0.00247
Iteration 1022: Cost has improved by 0.00246
Iteration 1023: Cost has improved by 0.00245
Iteration 1024: Cost has improved by 0.00244
Iteration 1025: Cost has improved by 0.00244
Iteration 1026: Cost has improved by 0.00243
Iteration 1027: Cost has improved by 0.00242
Iteration 1028: Cost has improved by 0.00241
Iteration 1029: Cost has improved by 0.00240
Iteration 1030: Cost has improved by 0.00239
Iteration 1031: Cost has improved by 0.00238
Iteration 1032: Cost has improved by 0.00238
Iteration 1033: Cost has improved by 0.00237
Iteration 1034: Cost has improved by 0.00236
Iteration 1035: Cost has improved by 0.00235
Iteration 1036: Cost has improved by 0.00234
Iteration 1037: Cost has improved by 0.00233
Iteration 1038: Cost has improved by 0.00233
Iteration 

Iteration 1469: Cost has improved by 0.00054
Iteration 1470: Cost has improved by 0.00054
Iteration 1471: Cost has improved by 0.00054
Iteration 1472: Cost has improved by 0.00054
Iteration 1473: Cost has improved by 0.00053
Iteration 1474: Cost has improved by 0.00053
Iteration 1475: Cost has improved by 0.00053
Iteration 1476: Cost has improved by 0.00053
Iteration 1477: Cost has improved by 0.00053
Iteration 1478: Cost has improved by 0.00053
Iteration 1479: Cost has improved by 0.00052
Iteration 1480: Cost has improved by 0.00052
Iteration 1481: Cost has improved by 0.00052
Iteration 1482: Cost has improved by 0.00052
Iteration 1483: Cost has improved by 0.00052
Iteration 1484: Cost has improved by 0.00052
Iteration 1485: Cost has improved by 0.00051
Iteration 1486: Cost has improved by 0.00051
Iteration 1487: Cost has improved by 0.00051
Iteration 1488: Cost has improved by 0.00051
Iteration 1489: Cost has improved by 0.00051
Iteration 1490: Cost has improved by 0.00051
Iteration 

Iteration 1895: Cost has improved by 0.00015
Iteration 1896: Cost has improved by 0.00015
Iteration 1897: Cost has improved by 0.00015
Iteration 1898: Cost has improved by 0.00015
Iteration 1899: Cost has improved by 0.00015
Iteration 1900: Cost has improved by 0.00015
Iteration 1901: Cost has improved by 0.00015
Iteration 1902: Cost has improved by 0.00015
Iteration 1903: Cost has improved by 0.00015
Iteration 1904: Cost has improved by 0.00015
Iteration 1905: Cost has improved by 0.00015
Iteration 1906: Cost has improved by 0.00015
Iteration 1907: Cost has improved by 0.00015
Iteration 1908: Cost has improved by 0.00014
Iteration 1909: Cost has improved by 0.00014
Iteration 1910: Cost has improved by 0.00014
Iteration 1911: Cost has improved by 0.00014
Iteration 1912: Cost has improved by 0.00014
Iteration 1913: Cost has improved by 0.00014
Iteration 1914: Cost has improved by 0.00014
Iteration 1915: Cost has improved by 0.00014
Iteration 1916: Cost has improved by 0.00014
Iteration 

'Done'

In [477]:
model.w, model.b

(array([-0.09883024,  0.318615  ,  0.69754353,  0.33786732,  0.15542034,
        -0.34930187, -0.00755835, -0.46600147, -0.11458905, -0.4202987 ,
         0.1741452 , -0.47236962, -0.15884616, -0.27119821,  0.13397253,
         0.30103711,  0.09370771,  0.0736861 ,  0.47576745,  0.29157071,
        -0.0733318 ,  0.13701191,  0.21669333,  0.12912002, -0.17590759,
        -0.3213477 , -0.67347347,  0.06780475, -0.02102902, -0.28385443,
         0.0073444 ,  0.12332363,  0.09393605,  0.27616947, -0.09051411,
         0.34355142,  0.22276518,  0.28216644, -0.21094582, -0.02024709,
        -0.34152947, -0.01186676, -0.03977493,  0.17013794, -0.6351028 ,
        -0.32767079, -0.5572174 , -0.23930109,  0.34382213, -0.18577427,
         0.1716506 ,  0.05418946,  0.63499739,  0.08973772, -0.2740825 ,
         0.05853937, -0.29137945,  0.00991001,  0.19463311,  0.27526177,
         0.21851492, -0.10418908,  0.19596161, -0.41048448, -0.24652567,
        -0.16427476, -0.27267669, -0.10308016,  0.3

In [478]:
model.train(X,y)

'This model has already been trained. Please instantiate another model.'