In [None]:
from opt_util import *
import matplotlib.pyplot as plt
import random
import torch
import numpy as np

# Implement GD for an (approximate) RKHS model

In [None]:
# generate some data
sgd = 1
n_x = 100  # num samples
dim_x = 1

data = np.random.uniform(-1, 1, size=[n_x, dim_x+1])
X = 2 * np.pi * torch.from_numpy(data[:, 0]).float()
Y = (torch.sin(X) + 0.4 * torch.rand_like(X, requires_grad=False)).reshape(-1, 1)

In [None]:
## define the model
f_rf_1 = ml_model(model_class = 'rf', dim_x=dim_x, loss = torch.nn.MSELoss())
optim_1 = torch.optim.SGD(f_rf_1.parameters, lr=0.1)

In [None]:
## carry out GD steps
for iter_gd in range(1000):
    # opt step
    f_rf_1.train_step(x_th=X, y_th=Y, optimization=optim_1)

    # for plotting
    if iter_gd % 100 == 0:
        print('iteration of GD:', iter_gd)
        plt.figure()
        plot_fitting(f_rf_1, X)
        plt.scatter(x=X, y=Y)
        plt.title("itertion:" + str(iter_gd))
        plt.show()

# Implement SGD for the same model

In [None]:
## define the model
f_rf_sgd = ml_model(model_class = 'rf', dim_x=dim_x, loss = torch.nn.MSELoss())
optim_sgd = torch.optim.SGD(f_rf_sgd.parameters, lr=0.1)

In [None]:
## carry out SGD steps
for iter_gd in range(1000):
    # opt step
    id_rnd = # todo: let's randomly sample some indices from the dataset
    f_rf_sgd.train_step(x_th=X[id_rnd], y_th=Y[id_rnd].reshape(-1,1), optimization=optim_sgd)

    # for plotting
    if iter_gd % 100 == 0:
        print('iteration of GD:', iter_gd)
        plt.figure()
        plot_fitting(f_rf_sgd, X)
        plt.scatter(x=X, y=Y)
        plt.title("itertion:" + str(iter_gd))
        plt.show()

# Implement an NN model

In [None]:
## define the model
f_nn = ml_model(model_class = 'nn', dim_x=dim_x, loss = torch.nn.MSELoss())
optim_nn = torch.optim.SGD(f_nn.parameters, lr=0.005)

In [None]:
## carry out SGD steps
for iter_gd in range(10000):
    # opt step
    id_rnd = # todo: sgd, same as above
    f_nn.train_step(x_th=X[id_rnd], y_th=Y[id_rnd].reshape(-1,1), optimization=optim_nn)

    # for plotting
    if iter_gd % 100 == 0:
        print('iteration of GD:', iter_gd)
        plt.figure()
        plot_fitting(f_nn, X)
        plt.scatter(x=X, y=Y)
        plt.title("itertion:" + str(iter_gd))
        plt.show()

## Questions
- [ ] Is there any benefits that SGD offers over GD?
- [ ] Any downsides?
- [ ] RKHS or NN? Why? Can RKHS (shallow) model behave badly?
- [ ] Can you tune better features for RKHS functions?

# Bonus
- Implement weight averaging: https://pytorch.org/docs/stable/optim.html#stochastic-weight-averaging
- Play with the step size (learning rate). What are the best (better) values?
- Does overfitting occur?
- Can you load another dataset? 
