Benjamin Ye  
CS/CNE/EE 156a: Learning Systems (Fall 2023)  
October 9, 2023

## Homework 2

In [1]:
import sys

import numpy as np
import pandas as pd

sys.path.insert(0, str(globals()['_dh'][0].resolve().parents[1]))
from cs156a import (Perceptron, LinearRegression,
                    coin_flip, hoeffding_inequality, 
                    target_function_random_line, target_function_homework_2,
                    generate_data, validate_binary)

rng = np.random.default_rng()

#### Problems 1–2

In [2]:
N_trials = 100_000
N_coins = 1_000
N_flips = 10
nus = coin_flip(N_trials, N_coins, N_flips, rng=rng)
coins = ("first coin", "random coin", 
         "coin with minimum frequency of heads")
df = pd.DataFrame({"coin": coins, "fraction of heads": nus.mean(axis=1)})
display(df.style.hide(axis="index")
                .set_caption(f"{N_trials:,} trials, {N_coins:,} coins, {N_flips:,} flips"))

epsilons = np.linspace(0, 0.5, 6)
histograms = np.apply_along_axis(
    lambda x: np.histogram(x, bins=np.linspace(-0.05, 1.05, 12))[0], 1, nus
) # requires at least 8 GB RAM
probabilities = np.hstack((
    histograms[:, (5,)], 
    histograms[:, 4::-1] + histograms[:, 6:]
)) / N_trials
bounds = hoeffding_inequality(N_flips, epsilons)
satisfies = probabilities < bounds
data = {"epsilon": epsilons, "bound": bounds}
for i in range(nus.shape[0]):
    data[coins[i]] = probabilities[i]
    data[i * " "] = satisfies[i]
df = pd.DataFrame(data)
(df.style.hide(axis="index")
         .format("{:.1f}", subset=["epsilon"])
         .set_caption("Hoeffding inequality"))

coin,fraction of heads
first coin,0.500752
random coin,0.500357
coin with minimum frequency of heads,0.037771


epsilon,bound,first coin,Unnamed: 3,random coin,Unnamed: 5,coin with minimum frequency of heads,Unnamed: 7
0.0,2.0,0.2473,True,0.24699,True,0.0,True
0.1,1.637462,0.40842,True,0.40866,True,0.0,True
0.2,0.898658,0.23571,True,0.23368,True,0.0,True
0.3,0.330598,0.08761,True,0.08825,True,5e-05,True
0.4,0.081524,0.01905,True,0.02028,True,0.37761,False
0.5,0.013476,0.00191,True,0.00214,True,0.62234,False


#### Problems 5–7

In [3]:
N_train = 100
N_test = 9 * N_train
N_runs = 1_000
f = target_function_random_line(rng=rng)
reg = LinearRegression(vf=validate_binary, rng=rng)
errors = np.zeros(2, dtype=float)
for _ in range(N_runs):
    E_in = reg.train(*generate_data(N_train, f, bias=True, rng=rng))
    errors += (
        E_in, 
        reg.get_error(*generate_data(N_test, f, bias=True, rng=rng))
    )
errors /= N_runs
print("For the linear regression model, the average in-sample and "
      f"out-of-sample errors over {N_runs:,} runs are "
      f"{errors[0]:.6f} and {errors[1]:.6f}, respectively.")

N_train = 10
pla = Perceptron(vf=validate_binary)
iters = 0
for _ in range(N_runs):
    f = target_function_random_line(rng=rng)
    x_train, y_train = generate_data(N_train, f, bias=True, rng=rng)
    reg.train(x_train, y_train)
    pla.set_parameters(w=reg.w, update=True)
    pla.train(x_train, y_train)
    iters += pla.iters
print("With initial weights from linear regression, the perceptron "
      f"takes an average of {iters / N_runs:.0f} iterations to "
      "converge.")

For the linear regression model, the average in-sample and out-of-sample errors over 1,000 runs are 0.026790 and 0.038431, respectively.
With initial weights from linear regression, the perceptron takes an average of 3 iterations to converge.


#### Problems 8–10

In [4]:
N_train = N_runs = 1_000
N_test = 9 * N_train
noise = (0.1, lambda y: -y)
reg = LinearRegression(vf=validate_binary, noise=noise, rng=rng)
E_in = 0
for _ in range(N_runs):
    E_in += reg.train(*generate_data(N_train, target_function_homework_2,
                                     bias=True, rng=rng))
print(f"For the linear regression model with {noise[0]:.0%} noise, "
      f"the average in-sample error over {N_runs:,} runs is "
      f"{E_in / N_runs:.6f}.")

transform = lambda x: np.hstack((x, x[:, 1:2] * x[:, 2:], x[:, 1:2] ** 2, 
                                 x[:, 2:] ** 2))
gs = np.array(((-1, -0.05, 0.08, 0.13, 1.5, 1.5), 
               (-1, -0.05, 0.08, 0.13, 1.5, 15),
               (-1, -0.05, 0.08, 0.13, 15, 1.5),
               (-1, -1.5, 0.08, 0.13, 0.05, 0.05),
               (-1, -0.05, 0.08, 1.5, 0.15, 0.15)))
w = np.zeros_like(gs[0])
reg.set_parameters(vf=validate_binary, transform=transform, noise=noise, 
                   update=True)
for _ in range(N_runs):
    reg.train(*generate_data(N_train, target_function_homework_2,
                             bias=True, rng=rng))
    w += reg.w
w /= N_runs
print(f"The average weight vector over {N_runs:,} runs is "
      "w=[", ", ".join(f"{v:.6f}" for v in w), "].", sep="")

counters = np.zeros(6, dtype=float)
for _ in range(N_runs):
    x_test, y_test = generate_data(N_test, target_function_homework_2,
                                   bias=True, rng=rng)
    x_test = transform(x_test)
    y_test[rng.choice(N_test, round(noise[0] * N_test), False)] *= -1
    h_test = np.sign(x_test @ w)
    counters += (*validate_binary(gs.T, x_test, h_test[:, None]),
                 np.count_nonzero(h_test != y_test) / N_test)
counters /= N_runs
df = pd.DataFrame({
    "choice": [f"[{chr(97 + i)}]" for i in range(5)],
    "g": [f"[{', '.join(f'{c:.2g}' for c in g)}]" for g in gs],
    "probability": 1 - counters[:5]
})
display(df.style.hide(axis="index")
                .set_caption(f"Closest hypothesis"))
print(f"The average out-of-sample error over {N_runs:,} runs is {counters[5]:.6f}.")

For the linear regression model with 10% noise, the average in-sample error over 1,000 runs is 0.505258.
The average weight vector over 1,000 runs is w=[-0.993461, -0.000059, -0.002343, -0.001125, 1.557845, 1.558107].


choice,g,probability
[a],"[-1, -0.05, 0.08, 0.13, 1.5, 1.5]",0.971422
[b],"[-1, -0.05, 0.08, 0.13, 1.5, 15]",0.663071
[c],"[-1, -0.05, 0.08, 0.13, 15, 1.5]",0.662427
[d],"[-1, -1.5, 0.08, 0.13, 0.05, 0.05]",0.632946
[e],"[-1, -0.05, 0.08, 1.5, 0.15, 0.15]",0.561405


The average out-of-sample error over 1,000 runs is 0.123710.
