In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('Student_Performance.csv')
data

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0
...,...,...,...,...,...,...
9995,1,49,Yes,4,2,23.0
9996,7,64,Yes,8,5,58.0
9997,6,83,Yes,8,5,74.0
9998,9,97,Yes,7,0,95.0


In [3]:
data = data.dropna()
data['Extracurricular Activities'] = data['Extracurricular Activities'].map({'Yes': 1, 'No': 0})
data

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,1,9,1,91.0
1,4,82,0,4,2,65.0
2,8,51,1,7,2,45.0
3,5,52,1,5,2,36.0
4,7,75,0,8,5,66.0
...,...,...,...,...,...,...
9995,1,49,1,4,2,23.0
9996,7,64,1,8,5,58.0
9997,6,83,1,8,5,74.0
9998,9,97,1,7,0,95.0


In [4]:
train_data = data.sample(frac=0.9,random_state=42)
test_data  = data.drop(train_data.index)
print(train_data.shape,test_data.shape)


(9000, 6) (1000, 6)


In [5]:
X_train = train_data.iloc[:,0:5].to_numpy()
Y_train = train_data.iloc[:,5].to_numpy()
X_test = test_data.iloc[:,0:5].to_numpy()
Y_test = test_data.iloc[:,5].to_numpy()
print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)

(9000, 5) (9000,) (1000, 5) (1000,)


In [6]:
class Linear_Regression:
    def __init__(self,X):
        self.w = np.random.rand(X.shape[1])
        self.b = np.random.rand()
    
    def cost(self,X,Y):
        return np.mean((X.dot(self.w)+self.b-Y)**2)/2
    def gradient(self,X,Y):
        
        dw = np.matmul(X.T,X.dot(self.w)+self.b-Y)/X.shape[0]
        
        db = np.mean((X.dot(self.w)+self.b-Y))
        return dw,db
    def update_weights(self,dw,db,lr):
        self.w = self.w - lr*dw
        self.b = self.b - lr*db
    def z_score(self,X):
        X_mean = X.mean()
        X_std = X.std()
        return (X-X.mean())/X.std(),X_mean,X_std
    def fit(self,X_train,Y_train,lr,epochs):
        X_train_scaled,_,__ = self.z_score(X_train)
        for i in range(epochs):
            dw,db = self.gradient(X_train_scaled,Y_train)
            self.update_weights(dw,db,lr)
            if ((i+1) %100 ==0): print('Epoch:',i+1,'Cost:',self.cost(X_train_scaled,Y_train),"weights:",self.w,"bias:",self.b)
    def predict(self,X_test,X_train):
        _,X_mean,X_std = self.z_score(X_train)
        X_test_scaled = (X_test-X_mean)/X_std
        return X_test_scaled.dot(self.w)+self.b
    def rsme_loss(self,X_test,Y_test):
        Y_pred = self.predict(X_test,X_train)
        return np.sqrt(np.mean((Y_test-Y_pred)**2))


    

In [10]:
model = Linear_Regression(X_train)
model.fit(X_train,Y_train,0.335,2500)
print('RSME Loss:',model.rsme_loss(X_test,Y_test))

Epoch: 100 Cost: 18.933334861449225 weights: [18.85799951 28.18947566 -2.80056474 -0.31092677  0.12396841] bias: 7.192195926551384
Epoch: 200 Cost: 12.153900595394099 weights: [32.51606559 28.41965232 -5.47018628 -0.46689239  0.19998069] bias: 11.947175114098828
Epoch: 300 Cost: 8.30424383452508 weights: [42.96635411 28.33080447 -7.5521651  -0.267584    0.5463501 ] bias: 15.778910675330724
Epoch: 400 Cost: 6.001894913025488 weights: [50.96464478 28.25503018 -9.1823584   0.16736342  1.01567169] bias: 18.901821371103008
Epoch: 500 Cost: 4.613001416157959 weights: [ 57.09425814  28.19331898 -10.4521797    0.75698142   1.52505676] bias: 21.459026949384423
Epoch: 600 Cost: 3.7665130360701182 weights: [ 61.79827483  28.14309282 -11.43410285   1.44098454   2.02612019] bias: 23.56396985758049
Epoch: 700 Cost: 3.2442148668185657 weights: [ 65.4134633   28.10218591 -12.18581769   2.17488979   2.49249246] bias: 25.306933357654405
Epoch: 800 Cost: 2.9171578743820676 weights: [ 68.19607363  28.0688