In [1]:
import csv
import numpy as np
inv = np.linalg.inv

In [2]:
def prepend_1(x):
  # prepend 1 to every row in x
  return np.concatenate((np.ones((x.shape[0], 1)), x), axis=1)

# Exact regression
def solve_regression(x, y):
  w = inv(x.T @ x)
  return w @ x.T @ y

# Exact ridge regression
def solve_ridge(x, y, l):
  w = inv(x.T @ x + l*np.identity(x.shape[1]))
  return w @ x.T @ y

In [4]:
with open('../datasets/winequality/winequality-white.csv') as f:
  reader = csv.reader(f, delimiter=';')
  header = next(reader)
  body = np.asarray(list(reader)).astype(np.float32)
x = prepend_1(body[:, :-1])
y = body[:, -1:]
N = 1000  # x.shape[0] // 2
train = (x[:N], y[:N])
test = (x[N:], y[N:])

lmbda = 0.1
beta_ridge = solve_ridge(*train, lmbda)
print('Beta_ridge\n', beta_ridge, '\n')

x, y = train
U, d, V = np.linalg.svd(x, full_matrices=False)
V = V.T
assert np.allclose(
  x,
  U @ np.diag(d) @ V.T)

assert np.allclose(
  U @ U.T @ y,
  x @ inv(x.T @ x) @ x.T @ y)

assert np.allclose(
  U @ (np.diag(d**2 / (lmbda + d**2))) @ U.T @ y,
  x @ beta_ridge)

assert np.allclose(
  x.T @ x,
  V @ np.diag(d*d) @ V.T)

print('Variances of principal components\n', d**2 / x.shape[0], '\n')


df = lambda l: (d**2 / (l + d**2)).sum()
table = [('lambda', 'df(lambda)')] + [(l, df(l)) for l in range(10)]
print('Effective degrees of freedom')
for row in table:
  print('{:10}{}'.format(*map(str, row)))


Beta_ridge
 [[ 8.68493816e-02]
 [ 3.56194200e-02]
 [-1.72941122e+00]
 [-1.74230009e-02]
 [ 1.49995133e-03]
 [-1.51712525e+00]
 [ 7.18651432e-03]
 [-1.02196415e-03]
 [-4.57011602e-01]
 [ 7.89902941e-01]
 [ 9.74775134e-01]
 [ 3.36553134e-01]] 

Variances of principal components
 [2.46934910e+04 1.51765773e+02 2.22283944e+01 1.58534056e+01
 6.50798516e-01 6.76585895e-02 1.48978637e-02 1.03797213e-02
 8.82704848e-03 1.49544281e-03 4.54683392e-04 8.97424994e-08] 

Effective degrees of freedom
lambda    df(lambda)
0         12.0
1         9.64317361577285
2         9.116462475809234
3         8.771417226556155
4         8.510152874401195
5         8.298594646653447
6         8.1206077682837
7         7.96711540041211
8         7.8324171148702515
9         7.712653300184957
