A different way to regularize iterative linear models is by calculating the validation error at each epoch. When the validation error starts going up, it is a sign that the model has started to overfit the data. So, when this happens the model is trained to stop.

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
%matplotlib inline

from sklearn.linear_model import SGDRegressor
from sklearn.base import clone
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

np.random.seed(42)
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 2 + X + 0.5 * X**2 + np.random.randn(m, 1)

In [None]:
# Basic implementation of early stopping.
SGDReg = SGDRegressor(max_iter=1, learning_rate="constant", warm_start=True, penalty=None, eta0=0.0005)

xTrain, xVal, yTrain, yVal = train_test_split(X[:50], y[:50].ravel(), test_size=0.5, random_state=10)

PolyTrain = PolynomialFeatures(degree=2, include_bias=False)
xTrainPoly = PolyTrain.fit_transform(xTrain)
xValPoly = PolyTrain.fit_transform(xVal)
yValPreds, yTrainPreds = [], []

minValidationError = np.infty
bestEpoch, bestModel = None, None
nEpochs = 100

for epoch in range(nEpochs):

    SGDReg.fit(xValPoly, yVal)
    yValPreds = SGDReg.predict(xValPoly)
    yTrainPreds = SGDReg.predict(xTrainPoly)
    yValError = mean_squared_error(yVal, yValPreds)
    yTrainError = mean_squared_error(yTrain, yTrainPreds)

    if(yValError <= np.infty):
        bestEpoch = epoch
        bestModel = clone(SGDReg)