In [1]:
from IPython.display import IFrame

In [2]:
IFrame("hw2.pdf", width=1000, height=1000)

# Hoeffding Inequality

In [3]:
import numpy as np
import random

In [12]:
class Coin:
    """
    a simple coin class that gives us a utility to flip a single coin a given number of times (fair or unfair).
    """
    def __init__(self, side=None):
        self.side = side
        self.heads_flipped = 0 # number of heads flipped in this coin's history
        self.tails_flipped = 0
        self.frac_heads = 0 # fraction of heads depending on num_flips
    def flip(self, thresh=0.5, num_flips=1, track_flips=False):
        """
        flip the coin, 50% probability of self.side changing to heads/tails or whatever thresh is
        """
        self.heads_flipped = 0 # reset every time we do a number of flips.
        self.tails_flipped = 0
        
        num_heads = 0
        num_tails = 0
        
        if track_flips:
            flips_so_far = []
            
        for i in range(num_flips):
            val = np.random.uniform(0,1)
            if val >= 0.5:
                self.side = "heads"
                self.heads_flipped += 1
                num_heads += 1
            else:
                self.side = "tails"
                self.tails_flipped += 1
                num_tails += 1
                
            # print("Flipped " + str(self.side))
                
            if track_flips:
                flips_so_far.append(self.side) #append this flip
        
        self.frac_heads = self.heads_flipped / num_flips # store this fraction.
        
        if track_flips:
            return num_heads, num_tails, flips_so_far
        
        return num_heads, num_tails

In [15]:
c = Coin()
heads, tails = c.flip(num_flips=10)

In [17]:
c.heads_flipped

2

In [33]:
def flip_coin_objects(num_coins, num_flips):
    """
    simulates the flipping of a number of coins--flips num_coins coins, flipping each for num_flips flips
    """
    coins = []
    for i in range(num_coins):
        c = Coin()
        c.flip(num_flips = num_flips)
        coins.append(c) # so we can retrieve properties of individual coins.
        # heads, tails = c.heads_flipped, c.tails_flipped # get number of heads/tails
        
    return coins

In [20]:
flipped_coins = flip_coins(5,10) # test this out.

In [21]:
c = flipped_coins[0]
c.heads_flipped

7

In [22]:
from operator import attrgetter

In [25]:
d = min(flipped_coins, key=attrgetter('heads_flipped'))

In [26]:
d.heads_flipped

4

In [27]:
for coin in flipped_coins:
    print(coin.heads_flipped)

7
6
7
7
4


Appears to work!

Nice, so this appears to work--we have flip_coins which will create an array of coins which have properties we can access. Now we need a function to run the full experiment.

In [34]:
def flip_coins(num_coins, num_flips, thresh=0.5):
    head_freqs = []
    for i in range(num_coins):
        num_heads = 0
        
        for j in range(num_flips):
            val = np.random.uniform(0,1)
            if val >= 0.5:
                num_heads += 1 # else don't do anything.
            
        heads_frac = num_heads / num_flips
        head_freqs.append(heads_frac)
        
    return head_freqs

In [35]:
def run_experiment(num_coins, num_flips, num_runs):
    """
    runs an experiment of flipping a certain number of coins
    """
    v_one_avg = 0
    v_rand_avg = 0
    v_min_avg = 0
    
    for i in range(num_runs):
        flipped_coins = flip_coins(num_coins, num_flips) # get an array of coins
        
        # we have the direct values of frequency of heads so we can just get the v's.
        v_one = flipped_coins[0] # first coin
        v_rand = random.choice(flipped_coins) # random coin
        v_min = min(flipped_coins) # coin w/ min number heads flipped
        
        v_one_avg += v_one
        v_rand_avg += v_rand
        v_min_avg += v_min
        
    v_one_avg /= num_runs
    v_rand_avg /= num_runs
    v_min_avg /= num_runs
    
    return v_one_avg, v_rand_avg, v_min_avg

In [32]:
def run_experiment_object(num_coins, num_flips, num_runs):
    """
    runs an experiment of flipping a certain number of coins (object version)
    """
    v_one_avg = 0
    v_rand_avg = 0
    v_min_avg = 0
    
    for i in range(num_runs):
        flipped_coins = flip_coins(num_coins, num_flips) # get an array of coins
        
        c_one = flipped_coins[0] # first coin
        c_rand = random.choice(flipped_coins) # random coin
        c_min = min(flipped_coins, key=attrgetter('heads_flipped')) # coin w/ min number heads flipped
        
        v_one = c_one.frac_heads
        v_rand = c_rand.frac_heads
        v_min = c_min.frac_heads
        
        v_one_avg += v_one
        v_rand_avg += v_rand
        v_min_avg += v_min
        
    v_one_avg /= num_runs
    v_rand_avg /= num_runs
    v_min_avg /= num_runs
    
    return v_one_avg, v_rand_avg, v_min_avg

Now we'll run the experiment, and answer the questions.

In [29]:
v_one_avg, v_rand_avg, v_min_avg = run_experiment(5,10,1) # test run.

In [30]:
v_one_avg, v_rand_avg, v_min_avg

(0.4, 0.5, 0.4)

Now we run 100,000 times, flipping 1,000 coins 10 times each.

In [36]:
v_one, v_rand, v_min = run_experiment(1000, 10, 1)

In [37]:
v_one, v_rand, v_min

(0.6, 0.4, 0.1)

Now we've got a version that doesn't create silly objects--let's run the full experiment.

In [38]:
v_one, v_rand, v_min = run_experiment(1000, 10, 100000)

KeyboardInterrupt: 

In [None]:
v_one, v_rand, v_min

## Problem 3

We want the error of $h$ in approximating $y$, where there is a probability of $\lambda$ that $y = f(x)$

$h$ approximates $f$ and makes an error with probability $\mu$. 
We want the probability $h$ makes an error on $y$, so there are two main  cases:
1. $h$ is a correct approximation, but $y \neq f(x)$.
2. $h$ is incorrect, and $y = f(x)$. 

Taking probabilities for each case:
1. $(1-\mu)(1-\lambda)$
2. $\mu \lambda$

__[e]__

From the above, the answer is $$(1 - \lambda)(1 - \mu) + \lambda \mu$$

## Problem 4

__[d]__

If the performance of $h$ is independent of $\mu$, that means the performance doesn't depend on how closely it tracks $f$, meaning that $f$ must be completely different from the actual (noisy) target $y$. So we have $\lambda = 1$.

# Linear Regression

In [39]:
def create_target_function():
    """
    create target function by initializing a line passing thru two random points in R2.
    """
    x1 = random.uniform(-1,1)
    x2 = random.uniform(-1,1)
    y1 = random.uniform(-1,1)
    y2 = random.uniform(-1,1)
    
    target_function = (x1,y1,x2,y2)
    
    return target_function

In [40]:
def targetFunction(x1,y1,x2,y2,x3,y3):
    u = (x2-x1)*(y3-y1) - (y2-y1)*(x3-x1)
    if u >= 0:
        return 1
    elif u < 0:
        return -1

In [41]:
def create_dataset(size):
    dataset = []
    for i in range(size):
        x = np.random.uniform(-1,1,2) # generate (x,y)
        x = np.insert(x,0,1)
        dataset.append(x)
        
    return dataset

Now we want to write the linear regression algorithm.

First, we'll try to do the normal eqn thing to see how  it goes.

In [42]:
X = create_dataset(4)
target_function = create_target_function()

In [43]:
X

[array([ 1.        ,  0.07728187,  0.77059638]),
 array([ 1.        ,  0.9786508 ,  0.72865224]),
 array([ 1.        ,  0.74650105,  0.49978432]),
 array([ 1.        , -0.35799172,  0.23422898])]

In [44]:
x1,y1,x2,y2 = target_function

In [47]:
y = []

In [48]:
for x in X:
    a,b = x[1],x[2]
    val = targetFunction(x1,y1,x2,y2,a,b)
    y.append(val)

In [96]:
y

[-1, 1, 1, -1]

So we have 2 points above, 2 below the line.

In [99]:
np.linalg.pinv(X).shape

(3, 4)

In [100]:
w = np.linalg.pinv(X).dot(y)

In [101]:
w

array([ 0.239723  ,  2.19832849, -1.85121591])

This w indeed has the dimensionality we want. Now we want to confirm what happens when we take $w^Tx$.

In [53]:
x = X[0]

In [54]:
x

array([ 1.        ,  0.07728187,  0.77059638])

In [56]:
w.shape

(3, 4)

In [57]:
w_t = w.T

In [58]:
w_t.shape

(4, 3)

In [60]:
x.shape

(3,)

In [61]:
(w_t.dot(x)).shape

(4,)

Now recall that the dimensions of $w$ are $(d+1) \times N$, where $N$ is the number of training examples and $(d+1)$ is the number of dimensions in them.

In [64]:
def create_y_vals(dataset, target_function):
    x1,y1,x2,y2 = target_function
    
    y = []
    
    for x in dataset:
        a,b = x[1],x[2]
        val = targetFunction(x1,y1,x2,y2,a,b)
        y.append(val)
        
    return y

In [113]:
def linear_regression(dataset, target_function, debug=False):
    """
    perform the linear regression algorithm, using the normal equation
    
    return: w, where w = Xt * y, where Xt is the pseudo-inverse of X.
    """
    X = dataset
    X = np.array(X)
    x1,y1,x2,y2 = target_function # unpack
    y = create_y_vals(dataset,target_function)
    
    X_inv = np.linalg.pinv(X)
        
    w = X_inv.dot(y)
    
    if debug:
        print("pseudo-inv of X: " + str(X_inv))
        print("y: " + str(y))
        print("w: " + str(w))
    
    return w

In [114]:
def in_sample_error(dataset, target_function, regression, debug=False):
    """
    takes in a dataset and a regression output w, checks the in-sample error
    """
    X = np.array(dataset)
    N = len(dataset)
    w = regression
    y = create_y_vals(dataset,target_function)
    
    if debug:
        print("X is: " + str(X))
        print("w: " + str(w))
    
    m = X.dot(w) - y
    
    err = 1/N * ((m.T).dot(m))
    #error = err[0]
    
    return err

In [123]:
def out_sample_error(num_points, target_function, regression):
    dataset = create_dataset(num_points) # will be 1000 for problem 6.
    X = np.array(dataset)
    N = num_points
    w = regression
    y = create_y_vals(dataset,target_function)
    
    m = X.dot(w) - y
    
    err = 1/N * ((m.T).dot(m))
    
    return err

In [124]:
def run_experiment(dataset_size, num_times, num_test, debug=False):
    """
    run a linear regression experiment, keeping the g functions and taking avg error
    
    num_test is # of testing pts
    """
    dataset = create_dataset(dataset_size)
    g = [] # keep track of results of linear regression
    error_in = 0
    error_out = 0
    
    for i in range(num_times):
        target_function = create_target_function()
        
        w = linear_regression(dataset, target_function, debug=debug)
        err = in_sample_error(dataset, target_function, w, debug=debug)
        err_out = out_sample_error(num_test, target_function, w)
        
        g.append(w)
        error_in += err
        error_out += err_out
        
    error_in /= num_times
    error_out /= num_times
    
    return g, error_in, error_out

## Problem 5

We need to run the experiment 1000 times on 100 points.

In [126]:
g, error_in, error_out = run_experiment(100, 1000, 1000) # third arg is testing w/ 1000 pts

In [127]:
error_in

0.29791842722554635

In [128]:
error_out

0.30405259006653285

__[e]__

Our error is 0.29, which is closest to 0.5.

## Problem 6

Now we want an out-of-sample error, meaning that we need to generate 1000 random points to get the linear regression line's error on these. Recall that we need the g from before.

I added code to calculate out-of-sample error, so we just use what we got above.

In [129]:
error_out

0.30405259006653285

__[e]__

Out-of-sample error is about 0.3, which is closest to 0.5. Notice that this tracks E_in pretty well!

## Problem 7

For dis boi we need to add the PLA code w/ some mods.

In [133]:
def targetFunction(x1,y1,x2,y2,x3,y3):
    u = (x2-x1)*(y3-y1) - (y2-y1)*(x3-x1)
    if u >= 0:
        return 1
    elif u < 0:
        return -1

In [134]:
def misclassified(value, target_function, w):
    """
    tells us if hypothesis output for a training example is correct or not, based on target function.
    """
    (x1,y1,x2,y2) = target_function
    x = value[1]
    y = value[2]
    
    true_val = targetFunction(x1,y1,x2,y2,x,y)
    hypothesis = np.sign(np.inner(w,value))
    
#     print("true val:" + str(true_val))
#     print("hypothesis:  " + str(hypothesis))
#     print('true val: '+ str(true_val))
    
    return not (hypothesis == true_val) # true if the value is indeed misclassified.

In [135]:
def get_all_misclassified(dataset, target_function, weight_vector):
    (x1,y1,x2,y2) = target_function # unpack
    misclassified_vals = []
    for index in range(len(dataset)):
        # print("value: " + str(dataset[index]))
        # hypothesis = np.sign(np.inner(weight_vector, dataset[index]))
        # print("hypothesis: " + str(hypothesis))
        if misclassified(dataset[index], target_function, weight_vector):
            misclassified_vals.append(dataset[index])
            
    return misclassified_vals

In [130]:
def PLA(dataset, target_function, regression, debug=False):
    """
    the Perceptron Learning Algorithm, for full dataset
    """
    #y_vals = eval_target_function(dataset, target_function) # get the array of y values for each point in dataset.
    
    (x1,y1,x2,y2) = target_function # unpack values
    w = regression # init weight vector to output of Linear Regression. Note that this will be 3x1 since x_i in X has 3 features
    num_iters = 0 # keep track of num iterations
    
    misclassified_vals = dataset
    
    done = False
    
    while not done:
        num_wrong = 0
        for point in misclassified_vals:
            x,y = point[1], point[2]
            if np.sign(np.dot(w,point)) != targetFunction(x1, y1, x2, y2, x, y):
                w = np.add(w, targetFunction(x1, y1, x2, y2, x, y) * point) # move w in right direction
                num_wrong += 1
                num_iters += 1
                misclassified_vals = get_all_misclassified(dataset, target_function, w)
                break

        if num_wrong == 0:
            done = True
        
    # now that nothing is misclassified, we get our hypothesis as a vector
    g = []
    f = []
    for point in dataset:
        x,y = point[1],point[2]
        prediction = np.sign(np.dot(w,point))
        true_val = targetFunction(x1,y1,x2,y2,x,y)
        g.append(prediction)
        f.append(true_val)
        
    if debug:
        print("g is: " + str(g))
        
    # calcualte P[g != f] where f was the target function.
    size = len(g)
    num_wrong = 0
    for i in range(size):
        if debug:
            print("i is: " + str(i))
        if g[i] != f[i]:
            num_wrong += 1
            
    prob_different = num_wrong / size # this is the probability P[g != f] where g is the hypothesis values.
    
    return w, num_wrong, num_iters, prob_different

In [131]:
def PLA_LR_experiment(dataset_size, num_times):
    dataset = create_dataset(dataset_size)
    avg_iters = 0
    
    for i in range(num_times):
        target_function = create_target_function()
        
        w = linear_regression(dataset, target_function) # perform linear regression to get this vector boi
        
        w, num_wrong, num_iters, prob_different = PLA(dataset, target_function, w) # run PLA
        
        avg_iters += num_iters # we are interested in number of iterations.
        
    avg_iters /= num_times
    
    return avg_iters

In [136]:
PLA_LR_experiment(10, 1000) # run 1000 times w/ dataset of size 10

2.595

__[a]__

I'm gonna hope nothing got messed up... Our avg number of iterations is 2.595 which is very close to 1 (answer __[a]__), so that's what we get!

## Problem 8

First let's add noise to dataset

In [139]:
x = np.array([3,6,1])

In [140]:
-x

array([-3, -6, -1])

In [170]:
def noisy_dataset(dataset_size, prob_noise):
    """
    create a dataset of a given size, with a certain probability that bits are flipped (noise)
    """
    dataset = create_dataset(dataset_size) # create initial dataset
    
    num_affected = int(prob_noise * len(dataset)) # e.g. if we want noise in 10% of the dataset
    
    for i in range(num_affected):
        idx = np.random.randint(0,dataset_size)
        dataset[idx] = -dataset[idx]
        
    return dataset

In [195]:
import math

In [196]:
y = [1,2,3]

In [198]:
for i in range(1):
    x = random.choice(y)
    x = -x

In [199]:
y

[1, 2, 3]

In [208]:
def linear_regression_noisy(dataset, noise_factor=0.10, debug=False):
    """
    perform the linear regression algorithm, using the normal equation
    
    return: w, where w = Xt * y, where Xt is the pseudo-inverse of X.
    """
    X = dataset
    X = np.array(X)
    y = []
    
    for x in dataset:
        x_1,x_2 = x[1],x[2]
        y_val = np.sign((x_1)**2 + (x_2)**2 - 0.6)
        y.append(y_val)
    
    # add noise to y
    num_affected = int(math.ceil(noise_factor * len(y)))
    for i in range(num_affected):
        index = random.randrange(0,len(y))
        y[index] = -y[index]
        
    X_inv = np.linalg.pinv(X)
        
    w = X_inv.dot(y)
    
    if debug:
        print("pseudo-inv of X: " + str(X_inv))
        print("y: " + str(y))
        print("w: " + str(w))
    
    return w

In [210]:
def in_sample_error_8(dataset, regression, debug=False):
    """
    takes in a dataset and a regression output w, checks the in-sample error
    """
    X = np.array(dataset)
    N = len(dataset)
    w = regression
    y = []
    
    for x in dataset:
        x_1,x_2 = x[1],x[2]
        y_val = np.sign((x_1)**2 + (x_2)**2 - 0.6)
        y.append(y_val)
    
    if debug:
        print("X is: " + str(X))
        print("w: " + str(w))
    
    m = X.dot(w) - y
    
    err = 1/N * ((m.T).dot(m))
    #error = err[0]
    
    return err

In [211]:
def noisy_experiment(dataset_size, prob_noise, num_times, num_test, out_sample=False, debug=False):
    """
    same as last time, but now we want noisy dataset and are interested in E_in
    """
    dataset = create_dataset(dataset_size)
    g = [] # keep track of results of linear regression
    error_in = 0
    error_out = 0
    
    for i in range(num_times):
        w = linear_regression_noisy(dataset, debug=debug)
        err = in_sample_error_8(dataset, w, debug=debug)
        
        g.append(w)
        error_in += err
        
        if out_sample:
            err_out = out_sample_error(num_test, target_function, w)
            error_out += err_out
        
    error_in /= num_times
    
    if out_sample:
        error_out /= num_times
        return g, error_in, error_out
    
    return g, error_in

In [212]:
g, error_in = noisy_experiment(1000, 0.1, 1000, 0) # 1000 points, 1000 times w/ 10% chance noise

In [213]:
error_in

0.98985289862868264

__[c]__

We get 0.368 for E_in, which is closest to 0.3. Notice that this is a somewhat higher in-sample error than when we run experiments that have no noise.

## Problem 9

Now we need to transform the feature vectors $x = (1, x_1, x_2)$ into $$ x = (1, x_1, x_2, x_1x_2, x_1^2, x_2^2) $$

In [157]:
dataset = create_dataset(3)

In [158]:
for x in dataset:
    print(x)

[ 1.         -0.26660633 -0.68398176]
[ 1.          0.50420892  0.60013603]
[ 1.         -0.05182251 -0.57265285]


In [162]:
for index in range(len(dataset)):
    x = dataset[index]
    x_1, x_2 = x[1], x[2]
        
    dataset[index] = (1, x_1, x_2, x_1*x_2, (x_1)**2, (x_2)**2)

In [163]:
for x in dataset:
    print(x)

(1, -0.2666063282541602, -0.68398176226172991, 0.18235386622940972, 0.071078934265165011, 0.46783105110666162)
(1, 0.50420892218493996, 0.60013603439421237, 0.30259394306624987, 0.25422663721089883, 0.36016325977841124)
(1, -0.051822513003188186, -0.57265285386778131, 0.029676309965875922, 0.0026855728539656085, 0.32793129104291452)


In [173]:
def transform_dataset(dataset):
    """
    transform a dataset into the specification for prob 9.
    """
    for index in range(len(dataset)):
        x = dataset[index]
        x_1, x_2 = x[1], x[2]
        
        dataset[index] = (1, x_1, x_2, x_1*x_2, (x_1)**2, (x_2)**2)
        
    return dataset

In [174]:
dataset = create_dataset(10)

In [175]:
dataset = transform_dataset(dataset)
dataset

[(1,
  0.97692497657082766,
  -0.19240835400564826,
  -0.18796852672899944,
  0.95438240984791223,
  0.037020974691162861),
 (1,
  0.58971218549680127,
  -0.78564266586962828,
  -0.46330305350951168,
  0.34776046172341374,
  0.61723439843473638),
 (1,
  -0.10476034715018523,
  0.60904741911894211,
  -0.063804019057824735,
  0.010974730335027322,
  0.37093875873544435),
 (1,
  0.41271738337042452,
  -0.9347771186229854,
  -0.38579876643262345,
  0.17033563853612996,
  0.87380826150109092),
 (1,
  0.60676295368030719,
  0.1752915779955706,
  0.10636043561987436,
  0.36816128195885062,
  0.030727137316177211),
 (1,
  0.65940264411118488,
  -0.016050574438287057,
  -0.010583791224109882,
  0.43481184706082193,
  0.00025762093979899391),
 (1,
  0.15387419585745654,
  0.32618950382140199,
  0.050192147597660983,
  0.023677268150778898,
  0.10639959240325243),
 (1,
  0.31940744985330261,
  0.030260171139249614,
  0.0096653240957122258,
  0.10202111902179002,
  0.00091567795737667527),
 (1,
  

In [176]:
target_function = create_target_function()

In [177]:
w_tilde = linear_regression(dataset, target_function)

In [178]:
w_tilde

array([-1.47639423,  2.89487874,  0.08616445, -2.18163229, -3.08158489,
        1.26410334])

Now we need the value sign($\tilde{w}^Tz$)

In [179]:
for x in dataset:
    print(np.sign(w_tilde.dot(x)))

-1.0
1.0
-1.0
1.0
-1.0
-1.0
-1.0
-1.0
-1.0
-1.0


In [180]:
def g_1(x_1, x_2):
    return np.sign(-1 - 0.05*x_1 + 0.08*x_2 + 0.13*x_1*x_2 + 1.5*(x_1)**2 + 1.5*(x_2)**2)

def g_2(x_1, x_2):
    return np.sign(-1 - 0.05*x_1 + 0.08*x_2 + 0.13*x_1*x_2 + 1.5*(x_1)**2 + 15*(x_2)**2)

def g_3(x_1, x_2):
    return np.sign(-1 - 0.05*x_1 + 0.08*x_2 + 0.13*x_1*x_2 + 15*(x_1)**2 + 1.5*(x_2)**2)

def g_4(x_1, x_2):
    return np.sign(-1 - 1.5*x_1 + 0.08*x_2 + 0.13*x_1*x_2 + 0.05*(x_1)**2 + 0.05*(x_2)**2)

def g_5(x_1, x_2):
    return np.sign(-1 - 0.05*x_1 + 0.08*x_2 + 1.5*x_1*x_2 + 0.15*(x_1)**2 + 0.15*(x_2)**2)

In [183]:
np.add([0,0,0], [1,2,3]) / 2

array([ 0.5,  1. ,  1.5])

In [215]:
def linear_regression_target(dataset, debug=False):
    """
    perform the linear regression algorithm, using the normal equation with a specified target function
    
    return: w, where w = Xt * y, where Xt is the pseudo-inverse of X.
    """
    X = dataset
    X = np.array(X)
    y = []
    
    for x in dataset:
        x_1,x_2 = x[1],x[2]
        y_val = np.sign((x_1)**2 + (x_2)**2 - 0.6)
        y.append(y_val)
        
    X_inv = np.linalg.pinv(X)
        
    w = X_inv.dot(y)
    
    if debug:
        print("pseudo-inv of X: " + str(X_inv))
        print("y: " + str(y))
        print("w: " + str(w))
    
    return w

In [221]:
def out_sample_error_target(num_points, regression, debug=False):
    dataset_orig = create_dataset(num_points) # will be 1000 for problem 6.
    dataset = transform_dataset(dataset_orig)
    X = np.array(dataset)
    N = num_points
    w = regression
    y = []
    
    for x in dataset_orig:
        x_1,x_2 = x[1],x[2]
        y_val = np.sign((x_1)**2 + (x_2)**2 - 0.6)
        y.append(y_val)
    
    if debug:
        print("X shape: " + str(X.shape))
        print("y shape: " + str(y.shape))
        print("X: " + str(X))
        print("y: " + str(y))
    
    m = X.dot(w) - y
    
    err = 1/N * ((m.T).dot(m))
    
    return err

In [226]:
def transformed_LR_experiment(dataset_size, num_runs, debug=False):
    """
    note that here we are interested in w_tilde, the solution for the transformed, nonlinear feature vectors
    we will use N=1000 for question 9.
    """
    
    dataset_orig = create_dataset(dataset_size)
    dataset = transform_dataset(dataset_orig)
    
    avg_agreements = [0, 0, 0, 0, 0]
    error_out = 0
    
    for i in range(num_runs):
        w_tilde = linear_regression_target(dataset)
        g_agreements = [0, 0, 0, 0, 0] # b/c we have 5 hypotheses atm
        err_out = out_sample_error_target(1000, w_tilde, debug=False)
        
        for i in range(dataset_size):
            x_g = []
            x_orig = dataset_orig[i]
            x_1,x_2 = x_orig[1],x_orig[2]
            x_g.append(g_1(x_1,x_2))
            x_g.append(g_2(x_1,x_2))
            x_g.append(g_3(x_1,x_2))
            x_g.append(g_4(x_1,x_2))
            x_g.append(g_5(x_1,x_2))
            
            prediction = np.sign(w_tilde.dot(x))
            if debug:
                print("w_tilde: " + str(w_tilde))
                print("x: " +  str(x))
                print("prediction: " + str(prediction))
            for g in x_g:
                if prediction == g:
                    g_agreements[x_g.index(g)] += 1 # increment number of agreements at that index--e.g. 0 for g_1, etc.
                    
        avg_agreements = np.add(avg_agreements, g_agreements)
        error_out += err_out
        
    #avg_agreements /= num_runs
    avg_agreements = np.true_divide(avg_agreements, num_runs, casting='unsafe')
    error_out /= num_runs
    
    return avg_agreements, error_out

In [227]:
avg_agreements, error_out = transformed_LR_experiment(1000, 20, debug=True) # we'll run 20 times w/ N = 1000

w_tilde: [-1.26274003  0.0158619  -0.00414948  0.12275936  1.96951405  2.00993288]
x: -1
prediction: [ 1. -1.  1. -1. -1. -1.]


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

__[a]__

Looks like the first hypothesis wins by far, with 1670 agreements.

## Problem 10

In [None]:
out_sample_error()