In [243]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [244]:
df = pd.read_csv('placement.csv')

In [245]:
df.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [246]:
X,y = df.iloc[:,0],df.iloc[:,-1]

In [247]:
X.shape

(200,)

In [248]:
y.shape

(200,)

In [249]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [250]:
X_train.shape

(160,)

In [251]:
X_train

137    7.14
163    8.93
111    5.42
123    5.10
109    7.77
       ... 
43     7.66
22     6.14
72     7.78
15     7.25
168    8.65
Name: cgpa, Length: 160, dtype: float64

In [252]:
y_train

137    3.13
163    3.91
111    2.25
123    2.30
109    4.06
       ... 
43     2.76
22     2.30
72     3.62
15     3.23
168    3.52
Name: package, Length: 160, dtype: float64

In [253]:
y_train.shape

(160,)

In [254]:
class MeraLR:
    def __init__(self):
        self.slope = None
        self.intercept = None
        
        
    def fit(self,X,y):
        mean_x = np.mean(X)
        mean_y = np.mean(y)
        
        numerator = np.sum((X - mean_x) * (y - mean_y))
        denominator = np.sum((X - mean_x) ** 2)
        self.slope =  numerator / denominator
        self.intercept = mean_y - self.slope*mean_x
        
    
    def predict(self, X):
        """
        Make predictions using the line equation y = mx + b
        """
        return self.slope * X + self.intercept
    
    def calculate_error(self,X_test):
        y_pred = self.predict(X_test)
        mse = np.mean((y_test - y_pred) ** 2)
        rmse = np.sqrt(mse)
        return mse,rmse
        
        
        
        

In [255]:
np.sqrt(0.12129235313495527)

np.float64(0.34827051717731616)

In [256]:
mlr = MeraLR()

In [257]:
mlr.fit(X_train,y_train)

In [258]:
mlr.calculate_error(X_test)

(np.float64(0.12129235313495527), np.float64(0.34827051717731616))

In [259]:
mlr.predict(X_test)

112    3.891116
29     3.093245
182    2.384646
199    2.574349
193    1.653729
85     1.776478
10     2.072193
54     2.931439
115    3.762787
35     2.937018
12     4.091979
92     3.511709
13     2.970495
126    2.401384
174    3.188097
2      3.467073
44     1.943864
3      3.243892
113    2.976075
14     3.416857
23     2.557611
25     3.165778
6      2.858905
134    3.121142
165    3.684674
173    2.870064
45     3.494970
65     3.344323
48     3.919014
122    1.960602
178    3.651197
64     3.210415
9      3.740469
57     2.786371
78     2.780792
71     3.271789
128    3.528447
176    2.613406
131    2.658042
53     2.713837
Name: cgpa, dtype: float64

In [260]:
from sklearn.linear_model import LinearRegression

In [262]:
lr = LinearRegression()

In [263]:
X_train = X_train.values.reshape(-1,1)

In [264]:
lr.fit(X_train,y_train)

In [265]:
X_test = X_test.values.reshape(-1,1)

In [266]:
lr.predict(X_test)

array([3.89111601, 3.09324469, 2.38464568, 2.57434935, 1.6537286 ,
       1.77647803, 2.07219258, 2.93143862, 3.76278706, 2.93701814,
       4.09197872, 3.51170867, 2.97049525, 2.40138424, 3.18809652,
       3.46707251, 1.94386362, 3.24389172, 2.97607477, 3.41685683,
       2.55761079, 3.16577844, 2.85890486, 3.12114229, 3.68467378,
       2.8700639 , 3.49497011, 3.34432308, 3.91901361, 1.96060218,
       3.65119666, 3.2104146 , 3.74046898, 2.7863711 , 2.78079158,
       3.27178932, 3.52844723, 2.61340599, 2.65804215, 2.71383735])

In [267]:
mlr.predict(X_test).shape

(40, 1)

In [268]:
mlr.predict(X_test).T[0]

array([3.89111601, 3.09324469, 2.38464568, 2.57434935, 1.6537286 ,
       1.77647803, 2.07219258, 2.93143862, 3.76278706, 2.93701814,
       4.09197872, 3.51170867, 2.97049525, 2.40138424, 3.18809652,
       3.46707251, 1.94386362, 3.24389172, 2.97607477, 3.41685683,
       2.55761079, 3.16577844, 2.85890486, 3.12114229, 3.68467378,
       2.8700639 , 3.49497011, 3.34432308, 3.91901361, 1.96060218,
       3.65119666, 3.2104146 , 3.74046898, 2.7863711 , 2.78079158,
       3.27178932, 3.52844723, 2.61340599, 2.65804215, 2.71383735])

In [269]:
lr.predict(X_test)

array([3.89111601, 3.09324469, 2.38464568, 2.57434935, 1.6537286 ,
       1.77647803, 2.07219258, 2.93143862, 3.76278706, 2.93701814,
       4.09197872, 3.51170867, 2.97049525, 2.40138424, 3.18809652,
       3.46707251, 1.94386362, 3.24389172, 2.97607477, 3.41685683,
       2.55761079, 3.16577844, 2.85890486, 3.12114229, 3.68467378,
       2.8700639 , 3.49497011, 3.34432308, 3.91901361, 1.96060218,
       3.65119666, 3.2104146 , 3.74046898, 2.7863711 , 2.78079158,
       3.27178932, 3.52844723, 2.61340599, 2.65804215, 2.71383735])

In [270]:


pd.DataFrame({

    "my_pred":mlr.predict(X_test).T[0],
    "lr_pred":lr.predict(X_test)
    
})


Unnamed: 0,my_pred,lr_pred
0,3.891116,3.891116
1,3.093245,3.093245
2,2.384646,2.384646
3,2.574349,2.574349
4,1.653729,1.653729
5,1.776478,1.776478
6,2.072193,2.072193
7,2.931439,2.931439
8,3.762787,3.762787
9,2.937018,2.937018
