In [None]:
%matplotlib inline
import math
import random
import numpy as np
import matplotlib.pyplot as plt

In [None]:
"""FIGURE 2.6. (right panel) The unit hypercube example"""
fraction_of_volume = np.arange(0, 1, 0.001)
edge_length_p1 = fraction_of_volume
edge_length_p2 = fraction_of_volume**.5
edge_length_p3 = fraction_of_volume**(1/3)
edge_length_p10 = fraction_of_volume**.1

fig1 = plt.figure(1)
ax11 = fig1.add_subplot(1, 1, 1)
ax11.plot(fraction_of_volume, edge_length_p10, label='p=10')
ax11.plot(fraction_of_volume, edge_length_p3, label='p=3')
ax11.plot(fraction_of_volume, edge_length_p2, label='p=2')
ax11.plot(fraction_of_volume, edge_length_p1, label='p=1')
ax11.set_xlabel('Fraction of Volume')
ax11.set_ylabel('Distance')
ax11.legend()
ax11.plot([.1, .1], [0, 1], '--', color='C0', alpha=.5)
ax11.plot([.3, .3], [0, 1], '--', color='C0', alpha=.5)
plt.show()


In [None]:
"""FIGURE 2.7. (bottom panels) Bias-variance decomposition example.
Given the dimension p, 100 simulations are done and the following steps are
taken for each simulation.
1. Generate data of size 1000 from [-1, 1]^p
2. Grap the nearest neighbor x of 0 and calculate the distance, i.e., norm
3. Calculate y=f(x) and the variance and the squared bias for simulation
of size 100.
"""
def generate_data(p: int, n: int) ->np.ndarray:
    if p == 1:
        return np.array([random.uniform(-1, 1) for _ in range(n)])
    return np.array([
        [random.uniform(-1, 1) for _ in range(p)]
        for _ in range(n)
    ])


def f(p: int, x: np.ndarray) ->float:
    if p == 1:
        return math.exp(-8*(x**2))
    return math.exp(-8*sum(xi*xi for xi in x))


def simulate(p: int, nsample:int, nsim: int) ->dict:
    res = {'average_distance': 0}
    sum_y = 0
    sum_y_square = 0
    for _ in range(nsim):
        data = generate_data(p, nsample)
        if p == 1:
            data_norm = np.abs(data)
        else:
            data_norm = np.linalg.norm(data, ord=2, axis=1)
        nearest_index = data_norm.argmin()
        nearest_x, nearest_distance = data[nearest_index], data_norm[nearest_index]
        nearest_y = f(p, nearest_x)
        sum_y += nearest_y
        sum_y_square += nearest_y*nearest_y
        res['average_distance'] += nearest_distance
    average_y = sum_y/nsim
    res['average_distance'] /= nsim
    res['variance'] = sum_y_square/nsim - average_y*average_y
    res['squared_bias'] = (1-average_y)*(1-average_y)
    return res


In [None]:
nsim = 100
data = {p: simulate(p, 1000, nsim) for p in range(1, 11)}
dimension = list(data.keys())
average_distance = [d['average_distance'] for p, d in data.items()]
variance = np.array([d['variance'] for p, d in data.items()])
squared_bias = np.array([d['squared_bias'] for p, d in data.items()])
mse = variance + squared_bias

fig2 = plt.figure(2, figsize=(10, 5))
ax21 = fig2.add_subplot(1, 2, 1)
ax21.set_title('Distance to 1-NN vs. Dimension')
ax21.plot(dimension, average_distance, 'ro--')
ax21.set_xlabel('Dimension')
ax21.set_ylabel('Average Distance to Nearest Neighbor')

ax22 = fig2.add_subplot(1, 2, 2)
ax22.set_title('MSE vs. Dimension')
ax22.plot(dimension, mse, 'o-', label='MSE')
ax22.plot(dimension, variance, 'o-', label='Variance')
ax22.plot(dimension, squared_bias, 'o-', label='Squared Bias')
ax22.set_xlabel('Dimension')
ax22.set_ylabel('MSE')
ax22.legend()
plt.show()

In [None]:
"""FIGURE 2.8. The variance-dominating example."""
print('Please check this later ...')

In [None]:
"""FIGURE 2.8. The variance-dominating example."""
print('Please check this later ...')