# Linear Regression

In [37]:
import numpy as np
class LogisticRegression:

    def __init__(self):
        np.random.seed(123456)
        self.w = np.random.rand(2)
        self.loss_history = []
        self.score_history = []
        self.prev_loss = np.inf

    def fit_analytic(self, X, y):
        self.w = np.linalg.inv(X.T@X)@X.T@y

    def fit_gradient(self, X, y, alpha, max_iter):
        P = X.T@X
        q = X.T@y

        self.score_history.append(self.score(X,y))

        for i in range(max_iter):
            gradient = 2*(P@self.w - q)
            self.w -= alpha*gradient

            new_score = self.score(X,y)

            self.score_history.append(new_score)


    def predict(self, X):
        return np.dot(X, self.w)

    def score(self, X, y):
        y_bar = np.full(shape=len(y), fill_value=(1/len(y))*sum(y), dtype=float)

        return sum((self.predict(X) - y) ** 2) / sum((y_bar - y) ** 2)


In [38]:
import numpy as np
from matplotlib import pyplot as plt

def pad(X):
    return np.append(X, np.ones((X.shape[0], 1)), 1)

def LR_data(n_train = 100, n_val = 100, p_features = 1, noise = .1, w = None):
    if w is None: 
        w = np.random.rand(p_features + 1) + .2
    
    X_train = np.random.rand(n_train, p_features)
    y_train = pad(X_train)@w + noise*np.random.randn(n_train)

    X_val = np.random.rand(n_val, p_features)
    y_val = pad(X_val)@w + noise*np.random.randn(n_val)
    
    return X_train, y_train, X_val, y_val

In [39]:
n_train = 100
n_val = 100
p_features = 2
noise = 0.2

# create some data
X_train, y_train, X_val, y_val = LR_data(n_train, n_val, p_features, noise)

In [40]:
LR = LogisticRegression()

LR.fit_analytic(X_train, y_train) # I used the analytical formula as my default fit method

print(LR.w)

print(f"Training score = {LR.score(X_train, y_train).round(4)}")
print(f"Validation score = {LR.score(X_val, y_val).round(4)}")

LR = LogisticRegression()

LR.fit_gradient(X_train, y_train, 0.01, 1000)

print(LR.w)

print(f"Training score = {LR.score(X_train, y_train).round(4)}")
print(f"Validation score = {LR.score(X_val, y_val).round(4)}")


[1.61670494 1.02074476]
Training score = 0.4913
Validation score = 0.523
[1.61670494 1.02074476]
Training score = 0.4913
Validation score = 0.523
