In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from tqdm import tqdm_notebook

In [2]:
def init(d, N):
#     np.random.seed(2)
    line_points = np.random.uniform(-1, 1, size=[2, 2])
    lr = LinearRegression(fit_intercept=True)
    lr.fit(line_points[:,0].reshape(-1, 1), line_points[:,1])
    line_coef = np.array([lr.intercept_, lr.coef_[0]])
    X = np.random.uniform(-1, 1, size=[N, d])

    y = X[:, 1] - X[:, 0] * line_coef[1] - line_coef[0] >= 0
    y = np.where(y, 1, -1)
    
    return X, y, line_coef

In [3]:
def LR(d, N_train, N_test=None, n_experiments=1000, print_w=False, PLA=False, noise=False, func=None, nonlinear_transform=None):
    Ein = []
    Eout = []
    loops_list = []
    w_list = []
    print('Running Linear Regression')
    for i in tqdm_notebook(range(n_experiments)):
        # Linear Regression
        # in-sample error
        if func:
            X = np.random.uniform(-1, 1, size=[N_train, d])
            y = func(X)
        else:
            X, y, w_fx = init(d, N_train)
            
        if nonlinear_transform:
            X = nonlinear_transform(X)
            
        X_bar = np.hstack((np.ones((X.shape[0],1)), X))
        if noise:
            idx = np.random.randint(0, N_train, size=int(N_train*noise))
            y[idx] *= -1
        
        w_gx = np.linalg.inv(X_bar.T.dot(X_bar)).dot(X_bar.T).dot(y)
        pred = X_bar.dot(w_gx)
        pred[pred < 0] = -1
        pred[pred > 0] = 1
        Ein.append(np.mean(pred!=y))
        w_list.append(w_gx)

        # out-sample error
        if N_test:
            if func:
                X_test = np.random.uniform(-1, 1, size=[N_test, d])
                y_test = func(X_test)
            else:
                X_test = np.random.uniform(-1, 1, size=[N_test, d])
                y_test = X_test[:, 1] - X_test[:, 0] * w_fx[1] - w_fx[0] >= 0
                y_test = np.where(y_test, 1, -1)
                
            if noise:
                idx = np.random.randint(0, N_test, size=int(N_test*noise))
                y_test[idx] *= -1
                
            if nonlinear_transform:
                X_test = nonlinear_transform(X_test)
            X_test_bar = np.hstack((np.ones((X_test.shape[0],1)), X_test))
            pred_out = X_test_bar.dot(w_gx)
            pred_out[pred_out < 0] = -1
            pred_out[pred_out > 0] = 1
            Eout.append(np.mean(pred_out!=y_test))

        # use initial weights for PLA
        if PLA:
            converged = False
            w_pla = w_gx.copy()
            loops = 0
            while not converged:
                converged = True
                for i in range(X_bar.shape[0]):
                    xi = X_bar[i]
                    yi = y[i]
                    if np.sign(xi.dot(w_pla)) != np.sign(yi):
                        loops += 1
                        w_pla += yi*xi
                        converged = False
            loops_list.append(loops)

    print('In-sample error: {}'.format(np.mean(Ein)))
    if N_test:
        print('Out-sample error: {}'.format(np.mean(Eout)))
    if PLA:
        print('Average loops for PLA: {}'.format(np.mean(loops_list)))
    if print_w:
        w_list = np.array(w_list)
        print('Weights: ', np.mean(w_list, axis=0))

In [4]:
# question 5, 6
LR(2, 100, 1000, 1000)

Running Linear Regression


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


In-sample error: 0.03866000000000001
Out-sample error: 0.04824600000000001


In [5]:
# question 7
LR(2, 10, 1000, 1000, PLA=True)

Running Linear Regression


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


In-sample error: 0.026800000000000004
Out-sample error: 0.109616
Average loops for PLA: 6.547


In [6]:
def myfunc(X):
    return np.sign(X[:,0]**2 + X[:,1]**2 - .6)

In [7]:
# question 8
LR(2, 1000, None, 1000, noise=.1, func=myfunc)

Running Linear Regression


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


In-sample error: 0.508009


In [8]:
def mytransform(X):
    X = np.hstack((X, (X[:,0]*X[:,1]).reshape(-1,1), (X[:,0]**2).reshape(-1,1), (X[:,1]**2).reshape(-1,1)))
    return X

In [9]:
# question 9
LR(2, 1000, False, 1000, print_w=True, noise=.1, func=myfunc, nonlinear_transform=mytransform)

Running Linear Regression


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


In-sample error: 0.11947900000000002
Weights:  [-1.00539261e+00 -9.38429538e-04 -1.18301654e-03 -2.24970688e-04
  1.57839652e+00  1.57756050e+00]


In [10]:
# question 10
LR(2, 1000, 1000, 1000, True, False, .1, myfunc, mytransform)

Running Linear Regression


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


In-sample error: 0.11937
Out-sample error: 0.12144999999999999
Weights:  [-1.00390918e+00 -6.13668466e-04  1.04065876e-03 -3.73659121e-03
  1.57671585e+00  1.57457246e+00]
