In [1]:
import mglearn
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Linear Regression w/ sklearn

# Load dataset
t = mglearn.datasets.load_boston()
X, Y = (t.data, t.target)

# Make train/test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

# Create model
model = LinearRegression()
model.fit(X_train, Y_train)

# Check accuracy
train_acc = model.score(X_train, Y_train)
test_acc = model.score(X_test, Y_test)
print('Train accuracy of scikit-learn: {:.2f}\r\nTest accuracy of scikit-learn: {:.2f}'.format(train_acc, test_acc))

Train accuracy of scikit-learn: 0.77
Test accuracy of scikit-learn: 0.64


In [3]:
# Linear Regression w/ Matrix fun

class MatrixLinearRegression:
    def __init__(self):
        pass
    
    def fit(self, X, Y):
        X_ = np.append(np.ones((X.shape[0],1)), X, axis = 1)
        tmp1 = np.linalg.inv(np.dot(X_.T, X_))
        tmp2 = np.dot(X_.T,Y)
        self.betas = np.dot(tmp1, tmp2)
    
    def score(self, X, Y):
        X_ = np.append(np.ones((X.shape[0],1)), X, axis = 1)
        prediction = np.dot(X_, self.betas)
        Y_mean = np.mean(Y)
        ssr = np.sum((prediction - Y)**2)
        ssto = np.sum((Y - Y_mean)**2)
        return 1 - ssr / ssto

model = MatrixLinearRegression()
model.fit(X_train, Y_train)
print('Train accuracy of own implementation: {:.2f}\r\nTest accuracy of own implementation: {:.2f}'.format(model.score(X_train, Y_train), model.score(X_test, Y_test)))

#Notice:
#This Regression fails if underdetermined, i.e. with the extended_boston_example from mglearn!

Train accuracy of own implementation: 0.77
Test accuracy of own implementation: 0.64
