In [None]:
%matplotlib inline
import math
import random
import numpy as np
import matplotlib.pyplot as plt

In [None]:
"""FIGURE 2.6. (right panel) The unit hypercube example"""
fraction_of_volume = np.arange(0, 1, 0.001)
edge_length_p1 = fraction_of_volume
edge_length_p2 = fraction_of_volume**.5
edge_length_p3 = fraction_of_volume**(1/3)
edge_length_p10 = fraction_of_volume**.1

fig1 = plt.figure(1)
ax11 = fig1.add_subplot(1, 1, 1)
ax11.plot(fraction_of_volume, edge_length_p10, label='p=10')
ax11.plot(fraction_of_volume, edge_length_p3, label='p=3')
ax11.plot(fraction_of_volume, edge_length_p2, label='p=2')
ax11.plot(fraction_of_volume, edge_length_p1, label='p=1')
ax11.set_xlabel('Fraction of Volume')
ax11.set_ylabel('Distance')
ax11.legend()
ax11.plot([.1, .1], [0, 1], '--', color='C0', alpha=.5)
ax11.plot([.3, .3], [0, 1], '--', color='C0', alpha=.5)
plt.show()

In [None]:
"""FIGURE 2.8. The variance-dominating example."""
print('Please check this later ...')

In [None]:
"""FIGURE 2.9. Relative EPE (at x_0 = 0) ratio for 1NN vs. least squares"""
size_simulation = 10000
size_train = 500
p = 10

list_epe_ols_linear = []
list_epe_1nn_linear = []
list_epe_ols_cubic = []
list_epe_1nn_cubic = []

for _ in range(size_simulation):
    epe_linear = []
    # Generate data
    train_x = np.random.uniform(-1, 1, size=(size_train, p))
    train_y_linear = train_x[:, 0]
    train_y_cubic = ((train_x[:, 0]+1)**3)/2
    train_error = np.random.randn(size_train)
    train_ye_linear = train_y_linear + train_error
    train_ye_cubic = train_y_cubic + train_error
    
    epe_ols_linear = []
    epe_1nn_linear = []
    epe_ols_cubic = []
    epe_1nn_cubic = []
    for dim in range(1, p+1):
        # Cut the dimension
        partial_x = train_x[:, :dim]
        partial_1x = np.hstack((np.ones((size_train, 1)), partial_x))
        
        obs_y_linear = np.random.randn(1)
        obs_y_cubic = .5 + np.random.randn(1)

        # Least squares for linear f
        xx = partial_1x.T @ partial_1x
        xy_linear = partial_1x.T @ train_ye_linear
        xxxy_linear = np.linalg.solve(xx, xy_linear)
        hat_ols = np.array([1] + [0]*dim) @ xxxy_linear
        epe_ols_linear.append((hat_ols-obs_y_linear)**2)

        # 1NN for linear f
        mat_norm = (partial_x*partial_x).sum(axis=1)
        nn = mat_norm.argmin()
        hat_1nn = train_ye_linear[nn]
        epe_1nn_linear.append((hat_1nn-obs_y_linear)**2)
        
        # Least squares for cubic f
        xy_cubic = partial_1x.T @ train_ye_cubic
        xxxy_cubic = np.linalg.solve(xx, xy_cubic)
        hat_ols = np.array([1] + [0]*dim) @ xxxy_cubic
        epe_ols_cubic.append((hat_ols-obs_y_cubic)**2)
        
        # 1NN for cubic f
        hat_1nn = train_ye_cubic[nn]
        epe_1nn_cubic.append((hat_1nn-obs_y_cubic)**2)

    list_epe_ols_linear.append(epe_ols_linear)
    list_epe_1nn_linear.append(epe_1nn_linear)
    list_epe_ols_cubic.append(epe_ols_cubic)
    list_epe_1nn_cubic.append(epe_1nn_cubic)

arr_epe_ols_linear = np.array(list_epe_ols_linear)
arr_epe_1nn_linear = np.array(list_epe_1nn_linear)
arr_epe_ols_cubic = np.array(list_epe_ols_cubic)
arr_epe_1nn_cubic = np.array(list_epe_1nn_cubic)

# Compute EPE, finally
epe_ols_linear = arr_epe_ols_linear.mean(axis=0)
epe_1nn_linear = arr_epe_1nn_linear.mean(axis=0)
epe_ols_cubic = arr_epe_ols_cubic.mean(axis=0)
epe_1nn_cubic = arr_epe_1nn_cubic.mean(axis=0)

# Plot
plot_x = list(range(1, p+1))
fig4 = plt.figure(4)
ax41 = fig4.add_subplot(1, 1, 1)
ax41.plot(plot_x, epe_1nn_linear/epe_ols_linear, '-o',
          color='C1', label='Linear')
ax41.plot(plot_x, epe_1nn_cubic/epe_ols_cubic, '-o',
          color='C0', label='Cubic')
ax41.legend()
ax41.set_xlabel('Dimension')
ax41.set_ylabel('EPE Ratio')
ax41.set_title('Expected Prediction Error of 1NN vs. OLS')
plt.show()