# Linear Regression

***Y = wX + b***

Y -> Dependent Variable

X -> Independent Variable

w -> weight (Slope)

b -> bias (Intercept)

# Gradient Descent

***w = w - a * dw***

***b = b - a * db***

In [16]:
import numpy as np

## Building Linear Regression

In [17]:
class Linear_Regression:

    # initiating the HyperParameters
    def __init__(self, learning_rate, no_of_iterations):
        self.learning_rate = learning_rate
        self.no_of_iterations = no_of_iterations

    def fit(self, X, Y):
        #nr of training examples & nr of features
        self.m, self.n = X.shape

        #initiating the weight and bias
        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        #implementing Gradinet Descent (optimization algorithm)
        for i in range(self.no_of_iterations):
            self.update_weights()
        

    def update_weights(self):
        Y_prediction = self.predict(self.X)

        # calculating the Gradients
        dw = - (2 * (self.X.T).dot(self.Y - Y_prediction)) / self.m
        db = - 2 * np.sum(self.Y - Y_prediction) / self.m

        #updating the weights
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    def predict(self, X):
        return X.dot(self.w) + self.b

    

***Convergence*** - the point at which a model can not perform any better

## Implementing Linear Regression

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [19]:
salary_data = pd.read_csv('salary_data.csv')

salary_data.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891


In [20]:
salary_data.tail()

Unnamed: 0,YearsExperience,Salary
25,9.0,105582
26,9.5,116969
27,9.6,112635
28,10.3,122391
29,10.5,121872


In [21]:
salary_data.shape

(30, 2)

In [22]:
salary_data.isnull().sum()

YearsExperience    0
Salary             0
dtype: int64

In [23]:
X = salary_data.iloc[:,:-1].values # removing the Salary column
Y = salary_data.iloc[:,1].values # accessing only the Salary column

print(X, Y)

[[ 1.1]
 [ 1.3]
 [ 1.5]
 [ 2. ]
 [ 2.2]
 [ 2.9]
 [ 3. ]
 [ 3.2]
 [ 3.2]
 [ 3.7]
 [ 3.9]
 [ 4. ]
 [ 4. ]
 [ 4.1]
 [ 4.5]
 [ 4.9]
 [ 5.1]
 [ 5.3]
 [ 5.9]
 [ 6. ]
 [ 6.8]
 [ 7.1]
 [ 7.9]
 [ 8.2]
 [ 8.7]
 [ 9. ]
 [ 9.5]
 [ 9.6]
 [10.3]
 [10.5]] [ 39343  46205  37731  43525  39891  56642  60150  54445  64445  57189
  63218  55794  56957  57081  61111  67938  66029  83088  81363  93940
  91738  98273 101302 113812 109431 105582 116969 112635 122391 121872]


In [24]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33, random_state = 2)

## Training the Linear Regression Model

In [26]:
model = Linear_Regression(learning_rate = 0.02, no_of_iterations = 1000)