<a href="https://colab.research.google.com/github/davidwhogg/GenerativeVsDiscriminative/blob/master/ipynb/double_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pylab as plt
#matplotlib inline

In [0]:
ORDER = 8

In [0]:
def make_fake_data(N, sigma=0.1):
  xtrain = np.random.normal(size=N)
  ytrain = xtrain + sigma * np.random.normal(size=N)
  xtest = np.random.uniform()
  ytest = xtest + sigma * np.random.normal()
  return xtrain, ytrain, xtest, ytest

In [0]:
def design_matrix(xs):
  A = np.vstack([xs ** k for k in range(ORDER + 1)]).T
  return A

def predict(xtrain, ytrain, xtest):
  pars = np.linalg.lstsq(design_matrix(xtrain), ytrain, rcond=1e-12)[0]
  prediction = design_matrix(xtest) @ pars
  return prediction, pars

def resid(xtrain, ytrain, xtest, ytest):
  return ytest - predict(xtrain, ytrain, xtest)[0]

In [0]:
np.random.seed(42)
xtr, ytr, xte, yte = make_fake_data(5)
ypr, pars = predict(xtr, ytr, xte)

In [0]:
plt.plot(xtr, ytr, "ko")
plt.plot(xte, yte, "ko", alpha=0.5)
plt.text(xte, yte, " test object")
plt.plot(xte, ypr, "ro")
plt.text(xte, ypr, " prediction at polynomial order {:d}".format(ORDER))

In [0]:
def estimate_mse_with_trials(Ntrain, Ntrial):
    dys = np.array([resid(*make_fake_data(Ntrain)) for t in range(Ntrial)])
    return np.median(dys ** 2)

In [0]:
print(estimate_mse_with_trials(3, 128))

In [0]:
Ns = np.arange(ORDER // 2, ORDER * 2 + 1)
mses = [estimate_mse_with_trials(N, 8192) for N in Ns]

In [0]:
plt.axvline(ORDER + 1)
plt.plot(Ns, mses, "ko")
plt.ylabel("median squared error polynomial prediction")
plt.xlabel("size of training set N")
plt.title("polynomial order {:d}".format(ORDER))