In [1]:
import numpy as np
import pandas as pd
import torch

In [2]:
df = (pd.read_csv("data/pokemon.csv", usecols=['name', 'defense', 'attack'], index_col=0)
        .head(10)
        .sort_values(by='defense')
        .reset_index()
     )
x = df['defense'].values
y = df['attack'].values

## GD with symbolic differentiation of the loss function

In [3]:
def gradient(x, y, w):
    return 2 * (x * (w * x - y)).mean()

In [4]:
def gradient_descent(x, y, w, alpha, ϵ=2e-4, max_iterations=100, print_progress=10):
    """Gradient descent for optimizing slope in simple linear regression with no intercept."""
    
    print(f"Iteration 0 \tw = {w:.3f}")
    iterations = 1  # init iterations
    dw = 2 * ϵ      # init. dw
    
    while abs(dw) > ϵ and iterations <= max_iterations:
        
        g = gradient(x, y, w)
        dw = alpha * g
        w -= dw
        
        if iterations % print_progress == 0:  # periodically print progress
            print(f"Iteration {iterations} \tw = {w:.3f}")
            
        iterations += 1        # increase iteration

In [5]:
gradient_descent(x, y, w=0.5, alpha=0.00001, print_progress=10)

Iteration 0 	w = 0.500
Iteration 10 	w = 0.799
Iteration 20 	w = 0.885
Iteration 30 	w = 0.909
Iteration 40 	w = 0.916


## GD with automatic differentiation of the loss function

In [6]:
x = torch.tensor(x)
y = torch.tensor(y)

In [7]:
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

In [8]:
def model(x, w):
    return x * w

In [9]:
def gradient_descent(x, y, w, alpha, ϵ=2e-4, max_iterations=100, print_progress=10):
    """Gradient descent for optimizing slope in simple linear regression with no intercept."""
    
    print(f"Iteration 0 \tw = {w:.3f}")
    iterations = 1  # init iterations
    dw = 2 * ϵ      # init. dw
    
    w = torch.tensor(w, requires_grad=True)
    
    while abs(dw) > ϵ and iterations <= max_iterations:
        
        y_pred = model(x, w)
        mse = loss(y_pred, y)
        mse.backward()
        
        with torch.no_grad():
            dw = alpha * w.grad
            w -= dw
        
        w.grad.zero_()
        
        if iterations % print_progress == 0:  # periodically print progress
            print(f"Iteration {iterations} \tw = {w:.3f}")
            
        iterations += 1        # increase iteration
    

In [10]:
gradient_descent(x, y, w=0.5, alpha=0.00001, print_progress=10)

Iteration 0 	w = 0.500
Iteration 10 	w = 0.799
Iteration 20 	w = 0.885
Iteration 30 	w = 0.909
Iteration 40 	w = 0.916
