In [None]:
from numpy.core.numeric import identity
import numpy as np

def import_data(filepath):
  f = open(filepath)
  X = []
  y = []
  for lines in f:
    line = lines.split()
    X.append((float(line[0]), float(line[1])))
    y.append(float(line[2]))
  return (np.array(X), np.array(y))

def phi(x1, x2):
  return (1, x1, x2, x1**2, x2**2, x1*x2, abs(x1 - x2), abs(x1 + x2))

def transformation(X):
  Z = []
  for point in X:
    x1, x2 = point
    Z.append(phi(x1, x2))
  return np.array(Z)

def w_lin(Z, y):
  return np.dot(np.dot(np.linalg.inv(np.dot(Z.T, Z)), Z.T), y)

def predict(w, x):
  return np.sign(np.dot(w,x))

def err(w, X, y):
  err = 0
  for i in range(len(X)):
    if y[i] != predict(w, X[i]):
      err += 1
  return float(err)/ len(X)


def sim():
  X_in, y_in = import_data("in.txt")
  X_out, y_out = import_data("out.txt")
  Z_in = transformation(X_in)
  w = w_lin(Z_in, y_in)
  Z_out = transformation(X_out)
  err_in = err(w, Z_in, y_in)
  err_out = err(w, Z_out, y_out)
  print("The in sample error is " + str(err_in))
  print("The out of sample error is " + str(err_out))


sim()

The in sample error is 0.02857142857142857
The out of sample error is 0.084


In [None]:
def w_reg(Z, y, lam):
  paren = np.linalg.inv(np.dot(Z.T, Z) + lam * np.identity(np.dot(Z.T, Z).shape[0]))
  return np.dot(np.dot(paren, Z.T), y)

def sim_weight_decay(k):
  lam = 10**k
  X_in, y_in = import_data("in.txt")
  X_out, y_out = import_data("out.txt")
  Z_in = transformation(X_in)
  w = w_reg(Z_in, y_in, lam)
  Z_out = transformation(X_out)
  err_in = err(w, Z_in, y_in)
  err_out = err(w, Z_out, y_out)
  print("The in sample error with weight decay k=" +str(k) + " is " + str(err_in))
  print("The out of sample error with weight decay k=" +str(k) + " is " + str(err_out))

sim_weight_decay(-3)

The in sample error with weight decay k=-3 is 0.02857142857142857
The out of sample error with weight decay k=-3 is 0.08


0.08

In [None]:
sim_weight_decay(3)

The in sample error with weight decay k=3 is 0.37142857142857144
The out of sample error with weight decay k=3 is 0.436


0.436

In [None]:
sim_weight_decay(2)
print("\n")
sim_weight_decay(1)
print("\n")
sim_weight_decay(0)
print("\n")
sim_weight_decay(-1)
print("\n")
sim_weight_decay(-2)

The in sample error with weight decay k=2 is 0.2
The out of sample error with weight decay k=2 is 0.228


The in sample error with weight decay k=1 is 0.05714285714285714
The out of sample error with weight decay k=1 is 0.124


The in sample error with weight decay k=0 is 0.0
The out of sample error with weight decay k=0 is 0.092


The in sample error with weight decay k=-1 is 0.02857142857142857
The out of sample error with weight decay k=-1 is 0.056


The in sample error with weight decay k=-2 is 0.02857142857142857
The out of sample error with weight decay k=-2 is 0.084


0.084

In [None]:
def sim_weight_decay_test(k):
  lam = 10**k
  X_in, y_in = import_data("in.txt")
  X_out, y_out = import_data("out.txt")
  Z_in = transformation(X_in)
  w = w_reg(Z_in, y_in, lam)
  Z_out = transformation(X_out)
  return err(w, Z_out, y_out)

lowest = sim_weight_decay_test(-10)
for i in range(18):
  lowest = min(lowest, sim_weight_decay_test(i - 9))
print(lowest)

0.056
