## Problem Set 2

**Q1-2**

The average value of v_min is closest to 0.01 and both v_1 and v_rand satisfy the single-bin Hoeffding Inequality (i.e. v is close to u).

In [1]:
import random as rd
import numpy as np

class CoinFlip:
    def __init__(self, N_coins=1000, N_flips=10):
        self.N_coins = N_coins
        self.N_flips = N_flips
        self.flips = np.random.randint(2, size=(N_coins, N_flips))
        self.heads = np.sum(self.flips, axis=1)

    def v_1(self):
        '''
        returns the fraction of heads for the first coin
        '''

        return float(self.heads[0]) / self.N_flips

    def v_rand(self):
        '''
        returns the fraction of heads for a random coin
        '''

        coin_index = rd.randint(0, self.N_coins - 1)
        return float(self.heads[coin_index]) / self.N_flips

    def v_min(self):
        '''
        returns the lowest fraction of heads across all coins
        '''

        return float(np.amin(self.heads, axis=0)) / self.N_flips

    
N_simulations = 1000
v_1, v_rand, v_min = 0.0, 0.0, 0.0

for _ in range(N_simulations):
    flip = CoinFlip(N_coins=1000, N_flips=10)
    v_1 += flip.v_1()
    v_rand += flip.v_rand()
    v_min += flip.v_min()

print('v_1: {}'.format(v_1 / N_simulations))
print('v_rand: {}'.format(v_rand / N_simulations))
print('v_min: {}'.format(v_min / N_simulations))

v_1: 0.5016000000000006
v_rand: 0.4994000000000002
v_min: 0.03740000000000026


**Q3**

You have the case where h incorrectly models f and f correctly determines y, as well as the case where h correctly models f, but f incorrectly determines y. That means the probability error that h makes in approximating y is:

$$(1 - \lambda) \cdot ( 1 - \mu) + \lambda \cdot \mu$$



**Q4**

The performance of h is independent of $\lambda$ when:

$$\frac{\partial}{\partial \mu} (1 - \lambda) \cdot ( 1 - \mu) + \lambda \cdot \mu = 0 \\
(\lambda - 1) + \lambda = 0\\
\lambda = \frac{1}{2}$$

**Q5-6**

In [2]:
from lfd import error_rate, classify, Data, OLS, Line

N_simulations = 1000
N_train = 100
N_test = 1000

E_in, E_out = 0.0, 0.0

for _ in range(N_simulations):
    line = Line()

    training_set = Data([N_train, 3], intercept=True)
    training_set.Y = classify(training_set.X, line.weights)

    test_set = Data([N_test, 3], intercept=True)
    test_set.Y = classify(test_set.X, line.weights)

    linreg = OLS()
    linreg.run(training_set.X, training_set.Y)
    weights = linreg.weights

    prediction_in = classify(training_set.X, weights)
    E_in += error_rate(prediction_in, training_set.Y)

    prediction_out = classify(test_set.X, weights)
    E_out += error_rate(prediction_out, test_set.Y)

print('Avg. Ein: {}'.format(E_in / N_simulations))
print('Avg. Eout: {}'.format(E_out / N_simulations))

Avg. Ein: 0.03861000000000003
Avg. Eout: 0.04788600000000003


**Q7**

Initializing the Perceptron weights with the least squares coefficients reduces the number of iterations required for convergence.

In [5]:
from lfd import PLA

N_simulations = 1000
N_train = 10
iterations = 0.0

for _ in range(N_simulations):
    line = Line()
    training_set = Data([N_train, 3], intercept=True)
    training_set.Y = classify(training_set.X, line.weights)

    linreg = OLS()
    linreg.run(training_set.X, training_set.Y)
    weights = linreg.weights
    perceptron = PLA(weights=weights)
    iterations += perceptron.run(training_set.X, training_set.Y)

print('Avg. PLA iterations: {}'.format(iterations / N_simulations))

Avg. PLA iterations: 4.145


**Q8**

In [6]:
N_simulations = 1000
N_train = 100
N_test = 1000

E_in, E_out = 0.0, 0.0

for _ in range(N_simulations):
    line = Line()
    training_set = Data([N_train, 3], intercept=True)

    training_set.Y = np.sign(
            np.add(
                np.multiply(training_set.X[:, 1], training_set.X[:, 1]),
                np.multiply(training_set.X[:, 2], training_set.X[:, 2]),
            ) - 0.6
        )

    training_set.add_noise()

    linreg = OLS()
    linreg.run(training_set.X, training_set.Y)

    weights = linreg.weights
    prediction = classify(training_set.X, weights)
    E_in += error_rate(prediction, training_set.Y)

print('Avg. Ein: {}'.format(E_in / N_simulations))

Avg. Ein: 0.4424599999999999


**Q9**

The weights found are close to [-1, -0.05, 0.08, 0.13, 1.5, 1.5].

In [7]:
N_simulations = 1000
N_train = 1000

E_in, E_out = 0.0, 0.0
weights = []

for _ in range(N_simulations):
    training_set = Data([N_train, 3], intercept=True)
    training_set.add_columns([
            np.multiply(training_set.X[:, 1], training_set.X[:, 2]),
            np.multiply(training_set.X[:, 1], training_set.X[:, 1]),
            np.multiply(training_set.X[:, 2], training_set.X[:, 2])
    ])

    training_set.Y = classify(training_set.X, weights=[-0.6, 0, 0, 0, 1, 1])
    training_set.add_noise()

    linreg = OLS()
    linreg.run(training_set.X, training_set.Y)
    weights.append(linreg.weights)

print('Avg. hypothesis weights: {}'.format(np.mean(weights, axis=0)))        

Avg. hypothesis weights: [ -9.91887999e-01   1.96334774e-03   3.60558872e-04  -6.36692467e-04
   1.55761458e+00   1.55646690e+00]


**Q10**

In [8]:
N_simulations = 1000
N_test = 1000

E_in, E_out = 0.0, 0.0

for _ in range(N_simulations):
    test_set = Data([N_test, 3], intercept=True)
    test_set.add_columns([
            np.multiply(test_set.X[:, 1], test_set.X[:, 2]),
            np.multiply(test_set.X[:, 1], test_set.X[:, 1]),
            np.multiply(test_set.X[:, 2], test_set.X[:, 2])
    ])

    test_set.Y = classify(test_set.X, weights=[-0.6, 0, 0, 0, 1, 1])
    prediction = classify(test_set.X, 
                          weights=[-1, -0.05, 0.08, 0.13, 1.5, 1.5])
    E_out += error_rate(prediction, test_set.Y)

print('Avg. Eout: {}'.format(E_out / N_simulations))

Avg. Eout: 0.053940999999999795
