<a href="https://colab.research.google.com/github/asia281/dnn2022/blob/main/Asia_of_Bootcamp_ML_test_task_3_students_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Generating the data
Note that data / ml problem definded here is quite hard - we will overfit for sure



In [1]:
import numpy as np
from sklearn.datasets import load_diabetes
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import KFold
from typing import Dict, List

SEED = 1337
np.random.seed(SEED)

solution = np.random.randn(701)
x_all = np.random.randn(1000, len(solution) - 1)
y_all = np.dot(x_all, solution[:-1]) + solution[-1]
y_all *= 10
x_all += np.random.randn(*x_all.shape) * 2


print(x_all.shape, y_all.shape)

(1000, 700) (1000,)


## Helper plotting function
No need to read

In [2]:
def plot_losses(all_losses):
    if len(all_losses.shape) == 2:
        all_losses = all_losses[np.newaxis]
    epochs = list(range(all_losses.shape[1]))

    loss = np.mean(all_losses, axis=0)
    loss_min = np.amax(all_losses, axis=0)
    loss_max = np.amin(all_losses, axis=0)

    def get_go(name, color, i):
        return [
            go.Scatter(
                x=epochs,
                y=list(loss[:, i]),
                line=dict(color=f'rgb({color})'),
                mode='lines',
                name=name,
            ),
            go.Scatter(
                x=epochs + epochs[::-1], # x, then x reversed
                y=list(loss_max[:, i]) + list(loss_min[:, i])[::-1], # upper, then lower reversed
                fill='toself',
                fillcolor=f'rgba({color},0.2)',
                line=dict(color='rgba(255,255,255,0)'),
                hoverinfo="skip",
                showlegend=False
            )
        ]
    fig = go.Figure([
        *get_go("train loss", "0,100,80", 0),
        *get_go("val loss", "100,0,80", 1),
    ], layout=go.Layout(xaxis_title="Epoch", yaxis_title='Loss'))

    fig.show()

## Solution

In [3]:
def h(x, weights):
	return x @ weights

def l2_term(alpha, weights):
	# L2 regularization term, do not regularize bias!
	return 0.5 * alpha * np.sum(weights[:-1]**2)


def l1_term(alpha, weights):
	# L2 regularization term, do not regularize bias!
	return 0.5 * alpha * np.sum(np.abs(weights[:-1]))

def MSE_loss(h_x, y):
	return ((h_x - y).T @ (h_x -y)) / (2*y.shape[0])

def MSELoss(p, y):
  return np.mean((p - y) ** 2)

def compute_loss_and_gradients(weights, x, y, alpha):
	x = np.c_[x, np.ones(x.shape[0])]
	h_x = h(x, weights)
	gradient = (2/x.shape[0]) * (x.T @ (h_x - y)) + alpha * weights
	gradient[-1] -= alpha * weights[-1]
	loss = MSE_loss(h_x, y) + l2_term(alpha, weights)
	return loss, gradient

In [4]:
def train(x, y, x_val, y_val, epochs, lr, alpha):
    weights = np.random.randn(x.shape[1] + 1) # the last weight is the bias
    losses = np.zeros((epochs, 2))
    # losses[:, 0] - train losses
    # losses[:, 1] - val losses

    for i in range(epochs):
        losses[i, 0], grad = compute_loss_and_gradients(weights, x, y, alpha)
        losses[i, 1], _ = compute_loss_and_gradients(weights, x_val, y_val, alpha)
        print(f"Loss for epoch {i}: {losses[i, 0]}")
        weights -= lr * grad
    return weights, losses

In [5]:
# TODO
# Here score is not important, only for your convenience. The mean cross validation score (later) is important.
lr = 0.01
epochs = 50
alpha = 0.1

weights, losses = train(x_all[:500], y_all[:500], x_all[500:], y_all[500:], epochs=epochs, lr=lr, alpha=alpha)
plot_losses(losses)

Loss for epoch 0: 31664.461390752404
Loss for epoch 1: 22326.087736133522
Loss for epoch 2: 16930.563383618086
Loss for epoch 3: 13545.493657648605
Loss for epoch 4: 11269.360058417555
Loss for epoch 5: 9649.588663094311
Loss for epoch 6: 8442.898750002518
Loss for epoch 7: 7510.214707572538
Loss for epoch 8: 6767.600259957704
Loss for epoch 9: 6161.938215783572
Loss for epoch 10: 5658.194373673339
Loss for epoch 11: 5232.409545941507
Loss for epoch 12: 4867.6696928478705
Loss for epoch 13: 4551.695985447914
Loss for epoch 14: 4275.352385431496
Loss for epoch 15: 4031.6920393763075
Loss for epoch 16: 3815.330524844082
Loss for epoch 17: 3622.023249025747
Loss for epoch 18: 3448.373783057279
Loss for epoch 19: 3291.6282248317398
Loss for epoch 20: 3149.527349798712
Loss for epoch 21: 3020.198379990112
Loss for epoch 22: 2902.074432842005
Loss for epoch 23: 2793.8336517673224
Loss for epoch 24: 2694.3525624920403
Loss for epoch 25: 2602.669869788452
Loss for epoch 26: 2517.958026276354
L

In [6]:
def train_cross_validation(x, y, epochs, lr, alpha):
    n_splits=5
    np.random.seed(SEED)
    all_losses = np.zeros((n_splits, epochs, 2))

    for i, (train_ids, val_ids) in enumerate(KFold(n_splits=n_splits, shuffle=True, random_state=SEED).split(x, y)):
        weights, losses = train(x[train_ids], y[train_ids], x[val_ids], y[val_ids], epochs=epochs, lr=lr, alpha=alpha)
        all_losses[i] = losses

    plot_losses(all_losses)

    mean_train_loss = np.mean(losses[:, 0])
    mean_val_loss = np.mean(losses[:, 1])

    return mean_train_loss, mean_val_loss

In [7]:
mean_train_loss, mean_val_loss = train_cross_validation(x_all, y_all, epochs=epochs, lr=lr, alpha=alpha)
mean_train_loss, mean_val_loss

Loss for epoch 0: 25413.21432892204
Loss for epoch 1: 21161.095668368966
Loss for epoch 2: 18364.633068918585
Loss for epoch 3: 16417.443544616723
Loss for epoch 4: 14991.296305655502
Loss for epoch 5: 13900.993782956168
Loss for epoch 6: 13037.438772166744
Loss for epoch 7: 12333.532297028683
Loss for epoch 8: 11746.254976501654
Loss for epoch 9: 11246.935914072803
Loss for epoch 10: 10815.782632292956
Loss for epoch 11: 10438.696582087887
Loss for epoch 12: 10105.353909250196
Loss for epoch 13: 9808.009570684122
Loss for epoch 14: 9540.728471325245
Loss for epoch 15: 9298.87667597582
Loss for epoch 16: 9078.775790073192
Loss for epoch 17: 8877.462586404177
Loss for epoch 18: 8692.518266732195
Loss for epoch 19: 8521.944874646446
Loss for epoch 20: 8364.074308801379
Loss for epoch 21: 8217.500303371544
Loss for epoch 22: 8081.026864988338
Loss for epoch 23: 7953.628682595478
Loss for epoch 24: 7834.420369938044
Loss for epoch 25: 7722.63230727858
Loss for epoch 26: 7617.591471672069
L

(10091.374440854155, 38419.91784918172)

In [8]:
assert mean_val_loss < 58500