In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

#### using the formula and acquiring the weights
$\beta = (X^{T}X)^{-1}X^{T}Y$

In [2]:
def LinearRegression1(X, y):
    X = np.concatenate((X, np.ones((X.shape[0], 1))), 1)
    beta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
    return beta

#### using gradient descent method and updating the weights gradually and lowering the gradient
$\hat{w} = w - 2*lr*X^{T}*(\hat{Y}-Y) $

In [3]:
class LinearRegression2():
    def __init__(self):
        print("Obj created")
    def fit(self, X, y, lr=0.005, iters=5):
        self.cost_ = []
        self.w = np.random.randn(X.shape[1]+1, 1) #initializing the weights with random values
        if X.shape[1] != self.w.shape[0]:
            #checking whether the X has dimentions that are compatible with the weights initialized
            X = np.concatenate((X, np.ones(X.shape[0]).reshape(X.shape[0], 1)), axis=1)
        for i in range(iters):
            y_hat = X.dot(self.w)
            loss = (y_hat-y)
            new = self.w - (2/len(X))*lr*(np.dot(X.T, loss)) #updating the weights
            if np.sum(abs(new - self.w)) < 1e-5:
                print(f"Aquired the best possible parameters required for fit at {i}")
                break
            self.w = new
            cost = np.sum(loss**2) * (2/X.shape[0]) #calculating the cost for the loss
            self.cost_.append(cost)
    def predict(self, x):
        if x.shape[1] != self.w.shape[0]:
            x = np.concatenate((x, np.ones(x.shape[0]).reshape(x.shape[0], 1)), axis=1)
        return x.dot(self.w)

#### Checking the above regression classes
* with some random data for X
* with a proper dataset

<b>Using Formula</b>

In [4]:
X = np.random.randn(100)
y = 2 + 3 * X + np.random.randn(100)

In [5]:
X = X.reshape(-1, 1)
y = y.reshape(-1, 1)
beta = LinearRegression1(X, y)

In [6]:
beta

array([[3.0016774 ],
       [2.00217049]])

<b>Using Gradient Descent</b>

In [7]:
lin = LinearRegression2()

Obj created


As we go on changing the learning rate the training epochs go down<br>
which means that $lr\propto epochs$

In [8]:
lin.fit(X, y, lr=0.001, iters=10000)

Aquired the best possible parameters required for fit at 4196


In [9]:
lin.w

array([[2.99687948],
       [2.00075977]])

In [10]:
lin.fit(X, y, lr=0.01, iters=10000)

Aquired the best possible parameters required for fit at 563


In [11]:
lin.w

array([[3.00117787],
       [2.00204943]])

In [12]:
lin.fit(X, y, lr=0.1, iters=10000)

Aquired the best possible parameters required for fit at 62


In [13]:
lin.w

array([[3.00162745],
       [2.00215821]])

As we see that the training time drastically reduces with respect to changing the learning rate

In [14]:
lin.cost_[-1]

2.244159696142243

#### Now on the real dataset

In [15]:
data = pd.read_csv("data/Salary_Data.csv")

In [16]:
data.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343.0
1,1.3,46205.0
2,1.5,37731.0
3,2.0,43525.0
4,2.2,39891.0


In [17]:
X = np.array(data.iloc[:, 0]).reshape(-1, 1)
y = np.array(data.iloc[:, 1]).reshape(-1, 1)

<b>Using Formula</b>

In [18]:
betaD = LinearRegression1(X, y)

In [19]:
betaD

array([[ 9449.96232146],
       [25792.20019867]])

<b>Using the gradient Descent method</b>

In [20]:
linD = LinearRegression2()

Obj created


In [22]:
linD.fit(X, y, lr=0.005, iters=10000)

Aquired the best possible parameters required for fit at 7355


In [23]:
linD.w

array([[ 9449.96293162],
       [25792.1960869 ]])

In [24]:
linD.cost_[-1]

62541903.44456923