In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pickle


### Model 1 - Linear Regression ###

In [2]:
#load train and test set from regression_x_test.pkl, regression_x_train.pkl, regression_y_test.pkl, regression_y_train.pkl
cm_x_train = np.load('pkl_collection\\regression_x_train.pkl', allow_pickle=True)
cm_x_test = np.load('pkl_collection\\regression_x_test.pkl', allow_pickle=True)
cm_y_train = np.load('pkl_collection\\regression_y_train.pkl', allow_pickle=True)
cm_y_test = np.load('pkl_collection\\regression_y_test.pkl', allow_pickle=True)


In [3]:
#Loss function
def mean_squared_error(x : np.ndarray, y : np.ndarray, theta : np.ndarray) -> np.ndarray:
    yhat = x @ theta 
    error = yhat - y 
    loss = (1 / len(y)) * np.sum(error ** 2) 
    return loss

#Gradient descent
def calculate_gradient_and_update(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> tuple([float, np.ndarray]):
    gradient = (1 / len(y)) * x.T @ ((x @ theta) - y) 
    theta_new = theta - (alpha * gradient) 
    loss = mean_squared_error(x, y, theta_new)
    return loss, theta_new


In [4]:
num_features = cm_x_train.shape[1]

loss_history = []
num_epochs = 10000

alpha = 0.1

#initialize theta to random values between -2 and 2
theta = np.random.uniform(-2,2,(num_features))

#Training Loop
for t in range(num_epochs):
    loss, theta = calculate_gradient_and_update(cm_x_train, cm_y_train, theta, alpha)
    loss_history.append(loss)

#Plot loss history
print('Final loss: ', loss)

Final loss:  16.01500347093344


In [5]:
#save theta and loss history for analysis notebook
pickle.dump(theta, open('pkl_collection\\regression_theta.pkl', 'wb'))
pickle.dump(loss_history, open('pkl_collection\\regression_loss.pkl', 'wb'))