In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import r2_score
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

In [2]:
train = pd.read_csv('../data/regression/Real estate train.csv')
train.drop('Unnamed: 0', inplace=True, axis=1)

In [3]:
class LinearReg:
    def fit(self, X, y):
        X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)
        prod_1 = np.matmul(X.transpose(), X)
        prod_2 = np.matmul(np.linalg.inv(prod_1), X.transpose())
        self.beta = np.matmul(prod_2, y)
    
    def predict(self, X_test):
        tmp = np.insert(X_test, 0, np.ones(X_test.shape[0]), axis=1)
        return np.matmul(tmp, self.beta)

In [4]:
class RidgeReg(float):
    def __init__(self, l):
        self.l = l

    def fit(self, X, y):
        identity_matrix = np.identity(X.shape[1])
        prod_1 = np.matmul(X.transpose(), X)
        prod_2 = self.l * identity_matrix
        term_1 = np.add(prod_1, prod_2)
        prod_3 = np.matmul(np.linalg.inv(term_1), X.transpose())

        center_function = lambda x: x - x.mean()
        y_centered = center_function(y)
        self.y_mean = y.mean()

        self.beta_rdige = np.matmul(prod_3, y_centered)
    
    def predict(self, X_test):
        inverse_center = lambda x: x + self.y_mean
        return inverse_center(np.matmul(X_test, self.beta_rdige))

In [5]:
X_train = train.drop('target', axis=1)
y_train = train['target']

test = pd.read_csv('../data/regression/Real estate test.csv')
test.drop('Unnamed: 0', inplace=True, axis=1)
X_test = test.drop('target', axis=1)
y_test = test['target']

In [6]:
sk_linreg = LinearRegression()
sk_linreg.fit(X_train, y_train)
y_pred = sk_linreg.predict(X_test)

linreg = LinearReg()
linreg.fit(X_train.values, y_train.values)
y_pred_manual = linreg.predict(X_test.values)

print("Sklearn's Linear Regression R2: ", r2_score(y_test, y_pred))
print("---------------------")
print("Manual Implementation of Linear Regression R2: ", r2_score(y_test, y_pred_manual))

Sklearn's Linear Regression R2:  0.6974280661583803
---------------------
Manual Implementation of Linear Regression R2:  0.6974280661583803


In [7]:
sk_ridge = Ridge(alpha=1)
sk_ridge.fit(X_train.values, y_train.values)
y_pred_ridge = sk_ridge.predict(X_test.values)

ridge_manual = RidgeReg(l = 1)
ridge_manual.fit(X_train.values, y_train.values)
y_pred_ridge_manual = ridge_manual.predict(X_test.values)

print("Sklearn's Ridg R2: ", r2_score(y_test, y_pred_ridge))
print("---------------------")
print("Manual Implementation of Ridge Regression R2: ", r2_score(y_test, y_pred_ridge_manual))

Sklearn's Ridg R2:  0.6974584605943199
---------------------
Manual Implementation of Ridge Regression R2:  0.6974584605943212
