In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils import train_test_split

df = pd.read_csv("./datasets/LinearRegression/Student_Performance.csv")
df

In [None]:
df["Extracurricular Activities"] = df["Extracurricular Activities"].map({"Yes": 1, "No": 0})
df.loc[:, ["Previous Scores", "Performance Index"]] /= 10

df.head()

In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, 0.7, True)

In [None]:
class LinearRegression:
    def fit(self, X: np.ndarray, y: np.ndarray, epochs: int, batch_size: int, print_every: int):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        self.coeff = np.zeros(X.shape[1])
        self.losses = []
        self.accs = []
        self.lrs = []

        steps = X.shape[0] // batch_size
        steps += int(steps * batch_size < X.shape[0])

        for epoch in range(epochs):

            total_loss = 0
            total_accuracy = 0

            for step in range(steps):
                X_batch = X[step * batch_size:((step + 1) * batch_size) if steps - step > 1 else None]
                y_batch = y[step * batch_size:((step + 1) * batch_size) if steps - step > 1 else None]

                result = np.dot(self.coeff, X_batch.T)
                errors = result - y_batch
                loss = np.mean(errors ** 2)
                accuracy = self.r_squared(result, y_batch)
                
                if print_every > 0 and not step % print_every:
                    print(f"------ step: {step} acc: {accuracy} loss: {loss}")

                grad = [(2 / errors.shape[0]) * np.sum(x_i * errors) for x_i in X_batch.T]
                grad = np.mean(X_batch * errors.reshape(-1, 1), axis=0)

                self.coeff += -0.01 * np.array(grad)

                total_loss += loss
                total_accuracy += accuracy

            print(f"epoch: {epoch} acc: {total_accuracy / steps} loss: {total_loss / steps}")

            self.losses.append(total_loss / steps)
            self.accs.append(total_accuracy / steps)
    
    def predict(self, X: np.ndarray):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        return np.dot(self.coeff, X.T)
    
    def r_squared(self, pred: np.ndarray, y: np.ndarray):
        return np.sum((pred - pred.mean()) ** 2) / (np.sum((y - y.mean()) ** 2))
    
regressor = LinearRegression()
regressor.fit(X_train, y_train, epochs=100, batch_size=64, print_every=-1)
pred = regressor.predict(X_test)
regressor.r_squared(pred, y_test)

In [None]:
plt.subplots(3, 1)
plt.subplot(3, 1, 1)
plt.plot(regressor.losses)
plt.title("Loss")
plt.subplot(3, 1, 2)
plt.plot(regressor.accs)
plt.title("Accuracy")
plt.tight_layout()
plt.show()