In [104]:
import pandas as pd
import numpy as np

In [105]:
data = pd.read_csv("Salary_Data.csv")
data.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343.0
1,1.3,46205.0
2,1.5,37731.0
3,2.0,43525.0
4,2.2,39891.0


Transpose of a matrix multiplied by itself is a Square Matrix: $X^{T}X = Square Matrix$

Inverse of a matrix multiplied by itself is an Identity Matrix: $(X^{T}X)^{-1} * X^{T}X  = Identity Matrix$

Solve for B which is a matrix of Coefficients

$y = XB + e$

$y = XB (set e to zero to minimize it)$

$X^{T}y = X^{T}XB$

$((X^{T}X)^{-1})X^{T}y = (X^{T}X)^{-1}(X^{T}X)B $

$B = (X^{T}X)^{-1}X^{T}y$

In [106]:
X = data[["YearsExperience"]].copy()
Y = data[["Salary"]].copy()

In [107]:
X.head()

Unnamed: 0,YearsExperience
0,1.1
1,1.3
2,1.5
3,2.0
4,2.2


In [108]:
Y.head()

Unnamed: 0,Salary
0,39343.0
1,46205.0
2,37731.0
3,43525.0
4,39891.0


In [109]:
X["Intercept"] = 1
X = X[["Intercept", "YearsExperience"]]

In [110]:
X

Unnamed: 0,Intercept,YearsExperience
0,1,1.1
1,1,1.3
2,1,1.5
3,1,2.0
4,1,2.2
5,1,2.9
6,1,3.0
7,1,3.2
8,1,3.2
9,1,3.7


In [111]:
X_T = X.T
X_T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
Intercept,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
YearsExperience,1.1,1.3,1.5,2.0,2.2,2.9,3.0,3.2,3.2,3.7,...,6.8,7.1,7.9,8.2,8.7,9.0,9.5,9.6,10.3,10.5


In [112]:
B = np.linalg.inv(X_T @ X) @ X_T @ Y
B

Unnamed: 0,Salary
0,25792.200199
1,9449.962321


In [113]:
B.index = X.columns
B

Unnamed: 0,Salary
Intercept,25792.200199
YearsExperience,9449.962321


In [114]:
predictions = X @ B

In [115]:
predictions

Unnamed: 0,Salary
0,36187.158752
1,38077.151217
2,39967.143681
3,44692.124842
4,46582.117306
5,53197.090931
6,54142.087163
7,56032.079627
8,56032.079627
9,60757.060788


$SSR = \sum (y- \hat{y})^2$

$SST = \sum (y- \bar{y})^2$

$r^2 = 1 - (SSR / SST)$

In [116]:
SSR = ((Y - predictions) ** 2).sum()

In [117]:
SSR

Salary    9.381286e+08
dtype: float64

In [118]:
SST = ((Y - Y.mean()) ** 2).sum()

In [119]:
SST

Salary    2.179498e+10
dtype: float64

In [120]:
R2 = 1 - (SSR/SST)

In [121]:
R2

Salary    0.956957
dtype: float64

In [122]:
print(f"SSR: {SSR}")

print(f"SST: {SST}")

print(f"R2: {R2}")

print(B)


SSR: Salary    9.381286e+08
dtype: float64
SST: Salary    2.179498e+10
dtype: float64
R2: Salary    0.956957
dtype: float64
                       Salary
Intercept        25792.200199
YearsExperience   9449.962321
