In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
import math

In [2]:
np.random.seed(900)

In [3]:
d = 2
N_train = 100
N_test = 1000

In [4]:
def generate_fline():
    D_pivot = np.random.uniform(-1, 1, (d, d))
    m = np.divide.reduce(np.subtract.reduce(D_pivot))
    b = D_pivot[0,1] - m * D_pivot[0,0]
    return lambda x: m * x + b

In [5]:
def label_data(D, fline):
    vfunc = np.vectorize(fline)
    return np.sign(D[:,1] - vfunc(D[:,0]))

In [6]:
def linear_regression(X, y):
    X = np.hstack((np.ones((X.shape[0], 1)), X))
    return np.linalg.pinv(X).dot(y)

In [7]:
def lr_output(X, w):
    X = np.hstack((np.ones((X.shape[0], 1)), X))
    return X.dot(w)

In [8]:
def plot_dataset(D, y, fline):
    plt.plot([-1, 1], [fline(-1), fline(1)])
    plt.scatter(D[(y <= 0),0], D[(y <= 0),1], c='r')
    plt.scatter(D[(y > 0),0], D[(y > 0),1], c='g')
    plt.show()

In [9]:
def err(D, w, y_real):
    diff = np.sign(lr_output(D, w)) == y_real
    return diff.dot(diff) / diff.shape[0]

In [10]:
def experiment():
    # Generate train and test data
    D_train = np.random.uniform(-1, 1, (N_train, d))
    D_test = np.random.uniform(-1, 1, (N_test, d))
    # Generate f
    fline = generate_fline()
    # Label the data
    y_train = label_data(D_train, fline)
    y_test = label_data(D_test, fline)
    # Find weights and g
    w = linear_regression(D_train, y_train)
    gline = lambda x: -(w[0] + w[1] * x) / w[2]
    # Calculate E_in and E_out
    E_in = err(D_train, w, y_train)
    E_out = err(D_test, w, y_test)
    return np.array([E_in, E_out])

In [11]:
E = np.array([experiment() for i in range(0, 1000)])

In [12]:
E_in_avg, E_out_avg = np.mean(E, axis=0)
print("E_in =", E_in_avg)
print("E_out =", E_out_avg)