In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [47]:
df = pd.read_csv('student-performance.csv')

In [48]:
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,1,9,1,91
1,4,82,0,4,2,65
2,8,51,1,7,2,45
3,5,52,1,5,2,36
4,7,75,0,8,5,66


In [49]:
df.describe()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,4.9929,69.4457,0.4948,6.5306,4.5833,55.2248
std,2.589309,17.343152,0.499998,1.695863,2.867348,19.212558
min,1.0,40.0,0.0,4.0,0.0,10.0
25%,3.0,54.0,0.0,5.0,2.0,40.0
50%,5.0,69.0,0.0,7.0,5.0,55.0
75%,7.0,85.0,1.0,8.0,7.0,71.0
max,9.0,99.0,1.0,9.0,9.0,100.0


In [50]:
x = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [51]:
def normalize_features(x):
    min_vals = x.min(axis=0)
    max_vals = x.max(axis=0)
    norm_x = (x - min_vals) / (max_vals - min_vals)
    return norm_x, min_vals, max_vals

def normalize_target(y):
    y_min = np.min(y)
    y_max = np.max(y)
    norm_y = (y - y_min) / (y_max - y_min)
    return norm_y, y_min, y_max

x_norm, x_min, x_max = normalize_features(x)
y_norm, y_min, y_max = normalize_target(y)

In [52]:
x_b = np.c_[np.ones((x_norm.shape[0], 1)), x_norm]

In [53]:
class MultiLinearRegressionGD:
    def __init__(self, learning_rate=0.01, max_epochs=10000, tolerance=1e-6):
        self.learning_rate = learning_rate
        self.max_epochs = max_epochs
        self.tolerance = tolerance
    
    def fit(self, x, y):
        m, n = x.shape
        self.theta = np.zeros(n)
        prev_mse = float('inf')
        
        for epoch in range(self.max_epochs):
            y_pred = x.dot(self.theta)
            error = y_pred - y
            gradient = (1/m) * x.T.dot(error)
            self.theta -= self.learning_rate * gradient
            
            mse = (1/(2*m)) * np.sum(error**2)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, MSE: {mse:.6f}")
            
            if abs(prev_mse - mse) < self.tolerance:
                print(f"Early stopping at epoch {epoch}")
                break
            
            prev_mse = mse

    def predict(self, x):
        return x.dot(self.theta)

In [54]:
model = MultiLinearRegressionGD()
model.fit(x_b, y_norm)

Epoch 0, MSE: 0.149035
Epoch 100, MSE: 0.015052
Epoch 200, MSE: 0.011551
Epoch 300, MSE: 0.009673
Epoch 400, MSE: 0.008164
Epoch 500, MSE: 0.006932
Epoch 600, MSE: 0.005918
Epoch 700, MSE: 0.005077
Epoch 800, MSE: 0.004376
Epoch 900, MSE: 0.003788
Epoch 1000, MSE: 0.003293
Epoch 1100, MSE: 0.002874
Epoch 1200, MSE: 0.002519
Epoch 1300, MSE: 0.002217
Epoch 1400, MSE: 0.001960
Epoch 1500, MSE: 0.001740
Epoch 1600, MSE: 0.001551
Epoch 1700, MSE: 0.001389
Epoch 1800, MSE: 0.001249
Epoch 1900, MSE: 0.001128
Early stopping at epoch 1981


In [55]:
print("Final theta : ", model.theta)

Final theta :  [ 0.11534573  0.21262401  0.54261867 -0.00097958  0.01696971  0.01257947]


In [None]:
x = np.array([7, 99, 1, 9, 1])

x_norm = (x - x_min) / (x_max - x_min)
x_b = np.hstack(([1], x_norm))
y = model.predict(x_b)
y = y * (y_max - y_min) + y_min 

print("Predicted normalized target value:", y)

Predicted normalized target value: 85.1338229907398
