In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [2]:
class ThreeD_LinReg:
    def __init__(self):
        self.B0 = None
        self.B1 = None
        self.B2 = None
    
    def fit(self, x_train, y_train):
        x1_mean = np.mean(x_train.iloc[:,0])
        x2_mean = np.mean(x_train.iloc[:,1])
        y_mean = np.mean(y_train)
        
        x1_ = x_train.iloc[:,0] - x1_mean
        x2_ = x_train.iloc[:,1] - x2_mean
        y_ = y_train - y_mean
        
        p_ = sum(y_ * x1_) / sum(x1_ * x1_)
        q_ = sum(x2_ * x1_) / sum(x1_ * x1_)
        
        self.B2 = (sum(y_ * x2_) - p_ * (sum(x1_ * x2_))) / (sum(x2_ * x2_) - q_ * sum(x1_ * x2_))
        self.B1 = p_ - self.B2 * q_
        self.B0 = y_mean - self.B1 * x1_mean - self.B2 * x2_mean
        return self.B0, self.B1, self.B2
    
    def predict(self, x_test):
        y_pred = self.B0 + (self.B1 * x_test.iloc[:,0]) + (self.B2 * x_test.iloc[:,1])
        return y_pred

    def r2_score(self, y_test, y_pred):
        num = sum((y_test - y_pred)**2)
        den = sum((y_test - np.mean(y_test))**2)
        r2_score = 1 - num/den
        return r2_score

In [3]:
df = pd.read_csv("LinRegTest.csv")

In [4]:
df.head()

Unnamed: 0,cgpa,iq,salary
0,3.55,63,5.24
1,8.98,115,10.96
2,1.19,61,2.77
3,7.42,103,11.2
4,5.47,63,8.52


In [5]:
x, y = df.iloc[:,0:-1], df.iloc[:,-1]

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size= 0.8, random_state = 5)

In [7]:
my_LR = ThreeD_LinReg()

B0, B1, B2 = my_LR.fit(x_train, y_train)

In [8]:
print(B0)
print(B1)
print(B2)

print(f"Linear Regression Equation:    Y = B0      + B1.X1      + B2.X2")
print(f"Linear Regression Equation:    Y = ({round(B0,2)}) + ({round(B1,2)})cgpa + ({round(B2,2)})iq")

-5.222991911538888
0.7653961826941007
0.12215441259061671
Linear Regression Equation:    Y = B0      + B1.X1      + B2.X2
Linear Regression Equation:    Y = (-5.22) + (0.77)cgpa + (0.12)iq


In [9]:
x_test.shape

(20, 2)

In [10]:
y_pred = my_LR.predict(x_test)

In [11]:
r2 = my_LR.r2_score(y_test, y_pred)
print(r2)

0.8582873008573078


In [12]:
#original Linear Regression for comparison

In [13]:
from sklearn.linear_model import LinearRegression

LR = LinearRegression()
LR.fit(x_train, y_train)

In [14]:
y_pred1 = LR.predict(x_test)

In [15]:
from sklearn.metrics import r2_score
r2_myLR = r2_score(y_test, y_pred)
r2_ = r2_score(y_test, y_pred1)
print(f"R2 Score using my LR: {r2_myLR}")
print(f"R2 Score using my orig LR: {r2_}")

R2 Score using my LR: 0.8582873008573079
R2 Score using my orig LR: 0.8582873008573078


In [16]:
df1 = pd.DataFrame()

In [17]:
df1['myLR'] = y_pred
df1['origLR'] = y_pred1
df1['diff'] = df1['myLR'] - df1['origLR']
df1['scale'] = df1['myLR'] / df1['origLR']
df1

Unnamed: 0,myLR,origLR,diff,scale
66,4.800776,4.800776,8.881784e-16,1.0
32,11.10479,11.10479,1.776357e-15,1.0
46,0.925161,0.925161,-1.776357e-15,1.0
28,7.382563,7.382563,-8.881784e-16,1.0
74,7.052825,7.052825,-1.776357e-15,1.0
23,8.722006,8.722006,3.552714e-15,1.0
10,6.480322,6.480322,-1.776357e-15,1.0
20,5.509814,5.509814,-8.881784e-16,1.0
17,8.008334,8.008334,-1.776357e-15,1.0
35,6.555935,6.555935,-3.552714e-15,1.0
