# **Linear Regression From Scratch Using Matrices**

# Code Implementation

In [40]:
# importing libraries for reading data and matrix calculation

import pandas as pd
import numpy as np


In [41]:
# read dataset using pandas

salary = pd.read_csv('sample_data/Salary_Data.csv')

In [42]:
salary.head(10)

Unnamed: 0,YearsExperience,Salary
0,1.1,39343.0
1,1.3,46205.0
2,1.5,37731.0
3,2.0,43525.0
4,2.2,39891.0
5,2.9,56642.0
6,3.0,60150.0
7,3.2,54445.0
8,3.2,64445.0
9,3.7,57189.0


In [43]:
# check row and column size of data
salary.shape

(30, 2)

In [44]:
# set independent variable X and target/dependent variable y

X = salary[["YearsExperience"]]
y = salary[["Salary"]]

In [45]:
X.head()

Unnamed: 0,YearsExperience
0,1.1
1,1.3
2,1.5
3,2.0
4,2.2


In [46]:
y.head()

Unnamed: 0,Salary
0,39343.0
1,46205.0
2,37731.0
3,43525.0
4,39891.0


In [47]:
# set in intercept row of 1s for matrix multiplication as explained in theory
X["intercept"] = 1

In [48]:
X.head()

Unnamed: 0,YearsExperience,intercept
0,1.1,1
1,1.3,1
2,1.5,1
3,2.0,1
4,2.2,1


In [49]:
# Transpose X

X_T = X.T

In [50]:
X_T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
YearsExperience,1.1,1.3,1.5,2.0,2.2,2.9,3.0,3.2,3.2,3.7,...,6.8,7.1,7.9,8.2,8.7,9.0,9.5,9.6,10.3,10.5
intercept,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [51]:
# calculate using the formula B = (XT * X)^ -1 * XT * y using numpy
B = np.linalg.inv(X_T @ X) @ X_T @ y

In [52]:
B

Unnamed: 0,Salary
0,9449.962321
1,25792.200199


In [53]:
# index B columns with X columns

B.index = X.columns

In [54]:
B

Unnamed: 0,Salary
YearsExperience,9449.962321
intercept,25792.200199


In [55]:
# calculate predictions from X using B

predictions = X @ B

In [56]:
predictions.head()

Unnamed: 0,Salary
0,36187.158752
1,38077.151217
2,39967.143681
3,44692.124842
4,46582.117306


# Evaluation

In [57]:
#Calculate predictions using Sum of Squares from Regression

SSR = ((y - predictions) ** 2).sum()

In [58]:
SSR

Salary    9.381286e+08
dtype: float64

In [59]:
#Calculate predictions using Sum of Squares tota

SST = (( y - y.mean()) ** 2).sum()

In [60]:

SST

Salary    2.179498e+10
dtype: float64

In [61]:
# Calculate R Square
R2 = 1 - (SSR / SST)

In [62]:
# The closer R2 is to 1 the more data points in the dataset fall on the regression line.
R2

Salary    0.956957
dtype: float64

In [63]:
# Comparing with linear regression from the Scikit library
from sklearn.linear_model import LinearRegression

In [64]:
lr = LinearRegression()

In [65]:
lr.fit(X,y)

In [66]:
# The intercept is equal to the value derived from the matrix calculation for the salary dataset
lr.intercept_

array([25792.20019867])

In [67]:
# The coefficient is equal to the value derived from the matrix calculation for the salary dataset
lr.coef_

array([[9449.96232146,    0.        ]])