In [None]:
# If you're running it in Google Colab, please run this first
# !wget https://raw.githubusercontent.com/dvgodoy/PyTorch101_AI_Plus/main/v4.py -O v4.py

# Exercise #1

In [None]:
import pandas as pd
import torch
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
plt.style.use('fivethirtyeight')

In [None]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
# url = 'https://raw.githubusercontent.com/dvgodoy/PyTorch101_AI_Plus/main/mpg/auto-mpg.data'
column_names = ['mpg', 'cyl', 'disp', 'hp', 'weight', 'acc', 'year', 'origin']

df = pd.read_csv(url, names=column_names, na_values='?', comment='\t', sep=' ', skipinitialspace=True)

## Dataset

In [None]:
df_two = ...

In [None]:
x = ...
y = ...

## Train-Validation Split

In [None]:
x_train, x_val, y_train, y_val = ...

## Tensors

### Data

In [None]:
device = ...

# Our data was in Numpy arrays, but we need to transform them into PyTorch's Tensors
x_train_tensor = ...
y_train_tensor = ...

x_val_tensor = ...
y_val_tensor = ...

### Parameters

In [None]:
torch.manual_seed(13)
b = ...
w = ...

In [None]:
b, w

# Exercise #2.1

In [None]:
# Downloads a script into Colab
#!curl https://raw.githubusercontent.com/dvgodoy/PyTorch101_AI_Plus/main/gradient_descent.py --output gradient_descent.py

In [None]:
from plotly.offline import iplot, init_notebook_mode
from ipywidgets import VBox, IntSlider, FloatSlider, Dropdown
from gradient_descent import *
import ipywidgets as widgets

init_notebook_mode(connected=False)

In [None]:
w0 = FloatSlider(description='Start', value=-1.5, min=-2, max=2, step=.05)
functype = Dropdown(description='Function', options=['Convex', 'Non-Convex'], value='Convex')
lrate = FloatSlider(description='Learning Rate', value=.05, min=.05, max=1.1, step=.05)
n_steps = IntSlider(description='# updates', value=10, min=10, max=20, step=1)

def f(functype, lrate, w0, n_steps):
    fig = build_fig(functype, lrate, w0, n_steps)
    display(iplot(fig))

configure_plotly_browser_state()
out = widgets.interactive_output(f, {'functype': functype, 'lrate': lrate, 'w0': w0, 'n_steps': n_steps})

### Playing with Learning Rates

Let's work through **an interactive example**!

We start at a (not so) **random initial value** of our **feature**, say, -1.5. It has a corresponding **loss** of 2.25.

You can choose between **two functions**:
- **convex**, meaning, its **loss is well-behaved** and **gradient descent is guaranteed to converge**
- **non-convex**, meaning, **all bets are off**!

Every time you **take a step**, the plot gets updated:

- The **red vector** is our update to the **weight**, that is, **learning rate times gradient**.

- The **gray vecto**r shows **how much the cost changes** given our update.

- If you divide their lengths, **gray over red**, it will give you the **approximate gradient**.

In [None]:
configure_plotly_browser_state()
VBox((w0, functype, lrate, n_steps, out))

#### Questions:

1. Choose a different learning rate, reset the plot and follow some steps. Observe the path it traces and check if it hits the minimum. Try different learning rates, see what happens if you choose a really big value for it.


2. Then, change the function to a ***Non-convex*** and set the learning rate to the minimum before following some steps. Where does it converge to? Try resetting and observing its path. Does it reach the global minimum? Try different learning rates and see what happens then.

# Exercise #2.2

In [None]:
#!curl https://raw.githubusercontent.com/dvgodoy/PyTorch101_AI_Plus/main/scaling.py --output scaling.py

In [None]:
from plotly.offline import iplot, init_notebook_mode
from ipywidgets import VBox, IntSlider, FloatSlider, Dropdown
from scaling import *
import ipywidgets as widgets

init_notebook_mode(connected=False)

x1, x2, y = data()
mygd = plotGradientDescent(x1, x2, y)
fig, update, (lr, scaled, epochs, batch_size, m1, m2) = build_figure(mygd)

def f(lr, scaled, epochs, batch_size, m1, m2):
    update(lr, scaled, epochs, batch_size, m1, m2)
    display(iplot(fig))

configure_plotly_browser_state()
out = widgets.interactive_output(f, {'lr': lr, 'scaled': scaled, 'epochs': epochs, 'batch_size': batch_size, 'm1': m1, 'm2': m2})

# Exercise #2.2

There are two parameters, x1 and x2, and we're using Gradient Descent to try to reach the ***minimum*** indicated by the ***star***.

The dataset has only 50 data points.

The controls below allow you to:
- adjust the learning rate
- scale the features x1 and x2
- set the number of epochs (steps)
- batch size (since the dataset has 50 points, a size of 64 means using ***all*** points)
- starting point for x1 and x2 (initialization)

Use the controls to play with different configurations and answer the questions below.

In [None]:
configure_plotly_browser_state()
VBox((lr, scaled, epochs, batch_size, m1, m2, out))

#### Questions

1. ***Without scaling features***, start with the ***learning rate at minimum***:
    - change the batch size - try ***stochastic***, ***batch*** and ***mini-batch*** sizes - what happens to the trajectory? Why?
    - keeping ***maximum batch size***, increase ***learning rate*** to 0.000562 (three notches) - what happens to the trajectory? Why?
    - now reduce gradually ***batch size*** - what happens to the trajectory? Why?
    - go back to ***maximum batch size*** and, this time, increase ***learning rate*** a bit further- what happens to the trajectory? Why?
    - experiment with different settings (yet ***no scaling***), including initial values ***x1*** and ***x2*** and try to get as close as possible to the ***minimum*** - how hard is it?
    - what was the ***largest learning rate*** you manage to use succesfully?


2. Check ***Scale Features*** - what happened to the surface (cost)? What about its level (look at the scale)?


3. ***Using scaled features***, answer the same items as in ***question 1***.


4. How do you compare the ***performance*** of gradient descent with and without ***scaling***? Why did this happen? (think about the partial derivatives with respect to each feature, especially without scaling)

# Exercise #2.3

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

## Scaling / Standardizing

In [None]:
...

x_train_sc = ...
x_val_sc = ...

## Training Loop

In [None]:
# Defines number of epochs
n_epochs = 100
lr = ...

# Step 0
np.random.seed(42)
...

for epoch in range(n_epochs):
    # Step 1
    yhat = ...
    
    # Step 2
    error = ...
    loss = ...

    # Step 3    
    b_grad = ...
    w_grad = ...
    
    # Step 4
    ...

In [None]:
print(b, w)

## Sanity Check

In [None]:
linr = LinearRegression()
linr.fit(x_train_sc, y_train)
print(linr.intercept_, linr.coef_[0])

# Exercise #3

In [None]:
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

## Dataset

In [None]:
x_train_tensor = ...
y_train_tensor = ...

x_val_tensor = ...
y_val_tensor = ...

## Training Loop

In [None]:
lr = ...
n_epochs = 100

# Step 0
torch.manual_seed(42)
b = ...
w = ...

loss_fn = ...
optimizer = ...

for epoch in range(n_epochs):
    # Step 1
    yhat = ...
    
    # Step 2
    loss = ...

    # Step 3
    ...

    # Step 4
    ...
    
print(b, w)

# Exercise #4

In [None]:
def make_train_step_fn(model, loss_fn, optimizer):
    # Builds function that performs a step in the train loop
    def perform_train_step_fn(x, y):
        # Sets model to TRAIN mode
        model.train()
        
        # Step 1
        yhat = ...
        # Step 2
        loss = ...
        # Step 3
        ...
        # Step 4
        ...
        
        # Returns the loss
        return loss.item()
    
    # Returns the function that will be called inside the train loop
    return perform_train_step_fn

## Model Configuration

In [None]:
torch.manual_seed(13)
model = ...
loss_fn = ...
optimizer = ...

## Model Training

In [None]:
train_step_fn = ...

n_epochs = ...

losses = []
# For each epoch...
for epoch in range(n_epochs):
    # Performs one train step and returns the corresponding loss
    loss = ...
    losses.append(loss)
    
# Checks model's parameters
print(model.state_dict())

# Exercise #5

In [None]:
from torch.utils.data import TensorDataset, DataLoader, random_split

In [None]:
def make_val_step_fn(model, loss_fn):
    # Builds function that performs a step in the validation loop
    def perform_val_step_fn(x, y):
        # Sets model to EVAL mode
        model.eval()
        
        # Step 1
        yhat = ...
        # Step 2
        loss = ...

        return loss.item()
    
    return perform_val_step_fn

## Data Preparation

In [None]:
train_dataset = ...
val_dataset = ...

# builds a loader of each set
train_loader = ...
val_loader = ...

## Model Configuration

In [None]:
torch.manual_seed(13)
model = ...
loss_fn = ...
optimizer = ...

## Model Training

In [None]:
train_step_fn = ...

val_step_fn = ...

n_epochs = ...

losses = []
val_losses = []

for epoch in range(n_epochs):
    mini_batch_losses = []
    # inner loop for mini batches
    ...

    loss = np.mean(mini_batch_losses)
    losses.append(loss)
    
    # VALIDATION
    # no gradients in validation!
    with torch.no_grad():
        mini_batch_losses = []
        ...

        val_loss = np.mean(mini_batch_losses)
        val_losses.append(val_loss) 

print(model.state_dict())

In [None]:
plt.plot(losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.yscale('log')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

## StepByStep

In [None]:
torch.manual_seed(13)
model = ...
loss_fn = ...
optimizer = ...

In [None]:
from v4 import StepByStep
sbs = ...

In [None]:
fig = sbs.plot_losses()

In [None]:
yhat_train = sbs.predict(x_train_tensor)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.scatter(y_train, yhat_train)
ax.plot([0, 45], [0, 45], linestyle='--', c='k', linewidth=1)
ax.set_xlabel('True')
ax.set_xlim([0, 45])
ax.set_ylabel('Predicted')
ax.set_ylim([0, 45])
ax.set_title('MPG')

In [None]:
from sklearn.metrics import r2_score
r2_score(y_train, yhat_train)

## Seven Features

In [None]:
df_seven = df.loc[:, ['mpg', 'cyl', 'disp', 'hp', 'weight', 'acc', 'year']].dropna()

x = df_seven.loc[:, ['cyl', 'disp', 'hp', 'weight', 'acc', 'year']].values
y = df_seven.loc[:, ['mpg']].values

x_train, x_val, y_train, y_val = ...

scaler = StandardScaler()

x_train_sc = ...
x_val_sc = ...

x_train_tensor = ...
y_train_tensor = ...

x_val_tensor = ...
y_val_tensor = ...

# builds datasets
train_dataset = ...
val_dataset = ...

# builds a loader of each set
train_loader = ...
val_loader = ...

In [None]:
torch.manual_seed(13)
model = ...
loss_fn = ...
optimizer = ...

In [None]:
sbs = ...

In [None]:
fig = sbs.plot_losses()

In [None]:
yhat_train = sbs.predict(x_train_tensor)

In [None]:
plt.scatter(y_train, yhat_train)

In [None]:
r2_score(y_train, yhat_train)