In [2]:
import mglearn
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [8]:
# Linear Regression w/ sklearn

# Load dataset
t = mglearn.datasets.load_boston()
X, Y = (t.data, t.target)

# Make train/test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

# Create model
model = LinearRegression()
model.fit(X_train, Y_train)

# Check accuracy
train_acc = model.score(X_train, Y_train)
test_acc = model.score(X_test, Y_test)
print('Train accuracy of Linear Regression: {:.2f}\r\nTest accuracy of Linear Regression: {:.2f}'.format(train_acc, test_acc))

Train accuracy of Linear Regression: 0.77
Test accuracy of Linear Regression: 0.64


In [9]:
# Linear Regression w/ Matrix fun

class MatrixLinearRegression:
    def __init__(self):
        pass
    
    def fit(self, X, Y):
        X_ = np.append(np.ones((X.shape[0],1)), X, axis = 1)
        tmp1 = np.linalg.inv(np.dot(X_.T, X_))
        tmp2 = np.dot(X_.T,Y)
        self.betas = np.dot(tmp1, tmp2)
    
    def score(self, X, Y):
        X_ = np.append(np.ones((X.shape[0],1)), X, axis = 1)
        prediction = np.dot(X_, self.betas)
        Y_mean = np.mean(Y)
        ssr = np.sum((prediction - Y)**2)
        ssto = np.sum((Y - Y_mean)**2)
        return 1 - ssr / ssto

model = MatrixLinearRegression()
model.fit(X_train, Y_train)
print('Train accuracy of own implementation: {:.2f}\r\nTest accuracy of own implementation: {:.2f}'.format(model.score(X_train, Y_train), model.score(X_test, Y_test)))

#Notice:
#This Regression fails if underdetermined, i.e. with the extended_boston_example from mglearn!

Train accuracy of own implementation: 0.77
Test accuracy of own implementation: 0.64


# Regularizations
Regularization is a way of preventing overfitting by adding a penalty. There are three ways (main) ways of regularization.

## L1-Regularization
The $L_1$-Regularization penalty term is: $\lambda\sum_{j=1}^{p}|\beta_j|$, with $\beta$ being the weights and $p$ the number of weights. With the $L_1$-Regularization there is a kind of automatically feature selection, since some weights will become $0$, indicating that the feature will not be used.

## L2-Regularization
The $L_2$-Regularization works by adding the penalty term $\lambda\sum_{j=1}^{p}{\beta_j}^2$. This will not yield in a feature selection, but also all features will be considered.

## Elastic Net-Regularization
The Elastic Net-Regularization combines the previous two regularization methods. $\lambda\sum_{j=1}^{p}|\beta_j| + \lambda\sum_{j=1}^{p}{\beta_j}^2$ is the penalty this time.

## $\lambda$-Choice
Tha $\lambda$-value determines how much regularization will be applied. If the value is very large, underfitting is likely, since the weights can decrease so much, that they have no real influence on the result anymore. If $\lambda$ is too low, the regularization has no to little effect. After all, cross validation can be used to find the best value.

In [39]:
# Load a new testdataset to see impact of regularization
X, Y = mglearn.datasets.load_extended_boston()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

lmmodel = LinearRegression()
lmmodel.fit(X_train, Y_train)

# L1-Regularization: Lasso
lassomodel = Lasso(alpha=0.01, max_iter=10000)
lassomodel.fit(X_train, Y_train)

# L2-Regularization: Ridge
ridgemodel = Ridge(alpha=0.15)
ridgemodel.fit(X_train, Y_train)

# Elastic Net
netmodel = ElasticNet(alpha=0.001, max_iter=10000)
netmodel.fit(X_train, Y_train)

print('Train-Accuracy of LM: {:.2f}'.format(lmmodel.score(X_train, Y_train)))
print('Test-Accuracy of LM: {:.2f}'.format(lmmodel.score(X_test, Y_test)))
print()

print('Train-Accuracy of Lasso: {:.2f}'.format(lassomodel.score(X_train, Y_train)))
print('Test-Accuracy of Lasso: {:.2f}'.format(lassomodel.score(X_test, Y_test)))
print()

print('Train-Accuracy of Ridge: {:.2f}'.format(ridgemodel.score(X_train, Y_train)))
print('Test-Accuracy of Ridge: {:.2f}'.format(ridgemodel.score(X_test, Y_test)))
print()

print('Train-Accuracy of ElasticNet: {:.2f}'.format(netmodel.score(X_train, Y_train)))
print('Test-Accuracy of ElasticNet: {:.2f}'.format(netmodel.score(X_test, Y_test)))

Train-Accuracy of LM: 0.95
Test-Accuracy of LM: 0.61

Train-Accuracy of Lasso: 0.90
Test-Accuracy of Lasso: 0.77

Train-Accuracy of Ridge: 0.92
Test-Accuracy of Ridge: 0.77

Train-Accuracy of ElasticNet: 0.92
Test-Accuracy of ElasticNet: 0.78
