In [None]:
import numpy as np

%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, Button, RadioButtons, TextBox

# Linear Regression in 1D
This exercise illustrates linear regression, its solution in closed form and by means of gradient descent.
Use only the following cell (i.e. the class `LinearRegressor`) for this task, the connection to the GUI is already set for you.

In [None]:
from sklearn import linear_model

class LinearRegressor:
  """
  This class implements a linear regression model, has its own
  pointers to a 1D training set (train_X) and associated labels (train_t)

  For instance:
  train_X   |    train_t
  ----------------------
   0.6      |        7.9
   1.9      |        4.2
  -0.3      |       12.9
  ----------------------
  train_X and train_t are of shape (n_training_examples,)

  The model is specified as:
  y_pred = w0 + w1 * x
  """

  def __init__(self, w0, w1, train_X, train_t):
    """
    The constructor sets some important values
    :param w0: initial offset (bias)
    :param w1: initial coefficient for x
    :param train_X: training samples inputs
    :param train_t: training samples targets
    """
    self.w0 = w0
    self.w1 = w1
    self.train_X = train_X
    self.train_t = train_t
    self.eps = 0.01
    self.max_iterations = 500

  def predict(self, x, w0, w1):
    """
    Evaluates y_pred = w0 + w1 * x with the given parameters
    :param x: input
    :param w0: offset (bias)
    :param w1: weight for x
    :return: the model's output
    """
    return w0 + w1 * x

  def get_predictions(self, x):
    return self.predict(x, self.w0, self.w1)

  def get_loss(self):
    """
    Calculates the loss with the current parameter settings (w0, w1) and the training set
    :return: mean squared error (MSE)
    """
    # Task a)
    ## TODO Implement the mean squared error (MSE) for the current values of w0 and w1 
    ## TODO and the training quantities (train_X, train_t).
    # train_t is given and has to be compared against y_pred
    
    y_pred = self.get_predictions(self.train_X)
    loss = 0
    
    # ----- Solution -----
    diff = y_pred - self.train_t
    summed_error = np.dot(diff, diff)
    loss = summed_error / len(self.train_t)
    # --------------------
    
    return loss
    
  def solve_normal_equation(self):
    """
    Calculates the optimal solution in terms of mean squared error
    according to the training set using the closed form solution (= normal equations)
    :return: a tuple (w0, w1) for the optimal weights
    """
    # Task d)
    ## TODO Implement the normal equations and set values for w0 and w1.
    self.w0, self.w1 = 0, 0
    
    # ----- Solution -----
    # optimizer used 
    regr = linear_model.LinearRegression(normalize=False)
    regr.fit(train_x.reshape(-1, 1), train_t)
    self.w0, self.w1 = regr.intercept_, regr.coef_
    # --------------------
    
    return self.w0, self.w1

  def gradient(self):
    """
    calculates the derivatives d Loss / d w0 and d Loss / d w1
    :return:the analytical loss gradient as a tuple (for w0 and w1)
    """
    # Task b)
    ## TODO Calculate the gradient first on paper and implement your formulas afterwards.
    ## TODO Verify the correctness of your formulas with the numerical gradient check, 
    ## TODO wher you estimate the local gradient in a small region arount the desired 
    ## parameter values, e.g. (L(w0 + eps, w1) - L(w0, w1))/ eps    for a small eps.
    
    w0_grad, w1_grad = 0.0, 0.0
    
    # ----- Solution -----
    y_pred = self.predict(self.train_X, self.w0, self.w1)
    diff = (y_pred - self.train_t)
    w0_grad = (2. * np.sum(diff) / len(y_pred))
    gradSum = np.dot(diff, self.train_X)
    w1_grad = 2. * gradSum / len(y_pred)    
    # --------------------
    
    return w0_grad, w1_grad

  def step_gradient(self, learning_rate):
    """
    Performs a parameter update according to the gradient calculated in self.gradient()
    :param learning_rate:
    :return:
    """
    g_w0, g_w1 = self.gradient()
    self.w0, self.w1 = self.w0 - learning_rate*g_w0, self.w1 - learning_rate * g_w1

  def solve_gradient_descent(self, learning_rate):
    """
    Calculates the optimal solution in terms of mean squared error
    according to the training set using gradient descent
    :return: a tuple w0, w1 for the optimal settings
    """
    # Task c)
    ## TODO Implement gradient descent using self.step_gradient.
    ## TODO Choose a meaningful termination criterion to assure your gradient method terminates.
    ## TODO As simple example, you could use over a fixed number of iterations
    
    # ----- Solution -----
    for i in range(self.max_iterations):
        old_w0, old_w1 = self.w0, self.w1
        self.step_gradient(learning_rate)
        # convergence criterion, if we do not need all iterations
        if np.abs(old_w0 - self.w0) < self.eps and np.abs(old_w1 - self.w1) < self.eps:
            break
    # --------------------
    
    return self.w0, self.w1


In [None]:
# DO NOT TOUCH THIS CODE!

# some mild formatting
font = {'family' : 'normal',
        'size'   : 12}
plt.rc('font', **font)
orange = (1.0, 0.57647, 0.039216)

fig, ax = plt.subplots()
axes : plt.Axes = ax

fig.set_size_inches(10,4.5)
plt.subplots_adjust(left=0.25, bottom=0.25)

# our input domain
x_min = 0.0
x_max = 10.0

x = np.arange(x_min, x_max, 0.01)
# the parameters that can be tuned by users
w0 = -4.
w1 = 1.2
use_noise = True
lock_update = False

def my_func(w0, w1, x):
    return w0 + w1*x


f = my_func(w0, w1, x)
l, = plt.plot(x, f, lw=2, color=orange, label = "Prediction" )
axes.set_xlabel("x")
axes.set_ylabel("y = w0 + w1 * x")

# now for some ground truth test samples
np.random.seed(137)
n_train = 8
w0_true = 6.2
w1_true = -1.78

train_x, train_t = (0,0)


def recreate_training_set():
    global train_x
    global train_t
    np.random.seed(1337)
    train_x = np.random.rand(n_train) * x_max
    train_t = my_func(w0_true, w1_true, train_x)

    if use_noise:
        noise = np.random.normal(0, 1, n_train)
        train_t = train_t + noise


recreate_training_set()

regressor = LinearRegressor(w0, w1, train_x, train_t)

train_plot, = plt.plot(train_x, train_t, 'xb', label="Train set")
vlines = ax.vlines(train_x, my_func(w0, w1, train_x), train_t)

plt.axis([0, x_max, -11, 11])


axcolor = 'lightgoldenrodyellow'
axloss = plt.axes([0.25, 0.9, 0.65, 0.06])
axfreq = plt.axes([0.25, 0.1, 0.65, 0.03], facecolor=axcolor)
axamp = plt.axes([0.25, 0.15, 0.65, 0.03], facecolor=axcolor)

lossLabel = TextBox(axloss,"Loss (MSE)", "0.0")
sw0 = Slider(axamp, 'w0', -15.0, 15.0, valinit=w0)
sw1 = Slider(axfreq, 'w1', -50.0, 50.0, valinit=w1)

def update_with_params(w0, w1):
    l.set_ydata(regressor.predict (x, w0, w1))
    train_plot.set_ydata(train_t)

    train_y_pred = regressor.predict (train_x, w0, w1)
    
    train_t_points = np.vstack((train_x, train_t)).T
    train_y_points = np.vstack((train_x, train_y_pred)).T
    segs_ = np.hstack((train_t_points, train_y_points))
    segs = segs_.reshape(len(train_t),2,2)

    vlines.set_segments(segs)
    # calculate new loss
    y_pred = my_func(w0, w1, train_x)
    loss = regressor.get_loss()
    lossLabel.set_val(str(loss))

    # calculate and show gradients
    grad_w0, grad_w1 = regressor.gradient()
    loss_label_grad_w0.set_val(np.around(grad_w0,2))
    loss_label_grad_w1.set_val(np.around(grad_w1,2))


def update(val):
    if not lock_update:
        w0 = sw0.val
        w1 = sw1.val
        regressor.w0, regressor.w1 = w0, w1
        update_with_params(w0, w1)


sw0.on_changed(update)
sw1.on_changed(update)

curr_width = 0.82
resetax = plt.axes([curr_width , 0.01, 0.08, 0.06])
curr_width -= 0.16
toggle_gt_ax = plt.axes([curr_width, 0.01, 0.13, 0.06])
curr_width -= 0.28
solve_NE_ax = plt.axes([curr_width, 0.01, 0.25, 0.06])
curr_width -= 0.28
solve_GD_ax = plt.axes([curr_width, 0.01, 0.25, 0.06])

# buttons
reset_button = Button(resetax, 'Reset', color=axcolor, hovercolor='0.975')
toggle_secret_button = Button(toggle_gt_ax, 'Show secret', color=axcolor, hovercolor='0.975')
solve_NE_button = Button(solve_NE_ax, 'Solve by normal equations', color=axcolor, hovercolor='0.975')
solve_GD_button = Button(solve_GD_ax, 'Solve by gradient descent', color=axcolor, hovercolor='0.975')

def reset(event):
    sw0.reset()
    sw1.reset()

def toggle_gt(event):
    ground_truth_ax_w0.set_visible(not (ground_truth_ax_w0.get_visible()) )
    ground_truth_ax_w1.set_visible(not (ground_truth_ax_w1.get_visible()) )

def solve_normal_equation(event):
    global  lock_update
    w0, w1 = regressor.solve_normal_equation()
    lock_update = True
    sw0.set_val(w0)
    sw1.set_val(w1)
    lock_update = False
    update_with_params(w0, w1)

reset_button.on_clicked(reset)
solve_NE_button.on_clicked(solve_normal_equation)
toggle_secret_button.on_clicked(toggle_gt)

noise_switch_ax = plt.axes([0.00, 0.7, 0.15, 0.15], facecolor=axcolor)
noise_switch = RadioButtons(noise_switch_ax, ('Noise on', 'Noise off'), active=0)


ax_learning_rate = plt.axes([0.12, 0.57, 0.05, 0.05])
input_learning_rate = TextBox(ax_learning_rate, "Learning rate ", "0.01", color=axcolor)

# text boxes for grad w0 and grad w1
current_line = 0.45
ax_loss_grad_w0_label = plt.axes([0.12, current_line, 0.05, 0.05])
current_line -= 0.05
ax_loss_grad_w1_label = plt.axes([0.12, current_line, 0.05, 0.05])
current_line -= 0.21
grad_step_button_ax = plt.axes([0.00, current_line, 0.15, 0.06])

loss_label_grad_w0 = TextBox(ax_loss_grad_w0_label, "d Loss / d w0 ", "0.0")
loss_label_grad_w1 = TextBox(ax_loss_grad_w1_label, "d Loss / d w1 ", "0.0")


grad_button = Button(grad_step_button_ax, 'Gradient step', color=axcolor, hovercolor='0.975')

# the "secret", i.e., the true parameters used to generate the training data
ground_truth_ax_w0 = plt.axes([0.07, 0.33, 0.05, 0.05], facecolor=axcolor)
ground_truth_label_w0 = TextBox(ground_truth_ax_w0, "w0 ", str(w0_true))
ground_truth_ax_w0.set_visible(False)

ground_truth_ax_w1 = plt.axes([0.07, 0.28, 0.05, 0.05], facecolor=axcolor)
ground_truth_label_w1 = TextBox(ground_truth_ax_w1, "w1 ", str(w1_true))
ground_truth_ax_w1.set_visible(False)

def take_gradient_step(event):
  learning_rate = float(input_learning_rate.text)
  regressor.step_gradient(learning_rate)
  update_with_params(regressor.w0, regressor.w1)
  return
grad_button.on_clicked(take_gradient_step)

def solve_gradient_descent(event):
  global lock_update
  learning_rate = float(input_learning_rate.text)
  w0, w1 = regressor.solve_gradient_descent(learning_rate)
  lock_update = True
  sw0.set_val(w0)
  sw1.set_val(w1)
  lock_update = False
  update_with_params(w0, w1)

solve_GD_button.on_clicked(solve_gradient_descent)

def noise_switch_func(label):
    global use_noise
    use_noise = (label == 'Noise on')
    recreate_training_set()
    regressor.train_X = train_x
    regressor.train_t = train_t
    update(None)
    return

noise_switch.on_clicked(noise_switch_func)
ax.legend(loc=2)
update_with_params(w0, w1)
plt.show()

