# Thinking in tensors, writing in PyTorch

A hands-on course by [Piotr Migdał](https://p.migdal.pl) (2019).

<a href="https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/5%20Nonlinear%20regression.ipynb" target="_parent">
    <img src="https://colab.research.google.com/assets/colab-badge.svg"/>
</a>

## Notebook 5: Non-linear regression

Very **Work in Progress**

![](https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Correlation_examples2.svg/400px-Correlation_examples2.svg.png)


### Exercise

Which of the following can be described by linear regression:

* without any modifications,
* by after rescaling *x* or *y*,
* cannot be described by linear regression?

**TODO**

* Prepare examples
* 1d function with nonlinearities (by hand and automatically)
* More advanced

**Datasets to consider**

* https://en.wikipedia.org/wiki/Flight_airspeed_record

**TODO later**

* livelossplot `plot_extrema` error
* drawing a plot 
* consider using [hiddenlayer](https://github.com/waleedka/hiddenlayer)

In [None]:
%matplotlib inline

from matplotlib import pyplot as plt

import torch
from torch import nn
from torch import tensor
from livelossplot import PlotLosses

In [None]:
X = torch.linspace(-2., 2., 30).unsqueeze(1)
Y = torch.cat([torch.zeros(10),  torch.linspace(0., 1., 10), 1. + torch.zeros(10)], dim=0)
plt.plot(X.squeeze().numpy(), Y.numpy(), 'r.')

In [None]:
linear_model = nn.Linear(in_features=1, out_features=1)

In [None]:
def train(X, Y, model, loss_function, optim, num_epochs):
    loss_history = []
    
    def extra_plot(*args):
        plt.plot(X.squeeze(1).numpy(), Y.numpy(), 'r.', label="Ground truth")
        plt.plot(X.squeeze(1).numpy(), model(X).detach().numpy(), '-', label="Model")
        plt.title("Prediction")
        plt.legend(loc='lower right')
    
    liveloss = PlotLosses(extra_plots=[extra_plot], plot_extrema=False)

    for epoch in range(num_epochs):
        
        epoch_loss = 0.0
        
        Y_pred = model(X)
        loss = loss_function(Y_pred, Y)
        
        loss.backward()
        optim.step()
        optim.zero_grad()
        
        liveloss.update({
            'loss': loss.data.item(),
        })
        liveloss.draw()

## Linear model

$$y = a x + b$$

In [None]:
class Linear(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.layer_weights = nn.Parameter(torch.randn(1, 1))
        self.layer_bias = nn.Parameter(torch.randn(1))
        
    def forward(self, x):
        return x.matmul(self.layer_weights).add(self.layer_bias).squeeze()

In [None]:
linear_model = Linear()
optim = torch.optim.SGD(linear_model.parameters(), lr=0.03)
loss_function = nn.MSELoss()

In [None]:
list(linear_model.parameters())

In [None]:
linear_model(X)

In [None]:
train(X, Y, linear_model, loss_function, optim, num_epochs=50)

## Nonlinear

$$ x \mapsto h \mapsto y$$

In [None]:
class Nonlinear(nn.Module):
    def __init__(self, hidden_size=2):
        super().__init__()
        
        self.layer_1_weights = nn.Parameter(torch.randn(1, hidden_size))
        self.layer_1_bias = nn.Parameter(torch.randn(hidden_size)) 
        
        self.layer_2_weights = nn.Parameter(torch.randn(hidden_size, 1) ) 
        self.layer_2_bias = nn.Parameter(torch.randn(1))
        
    def forward(self, x):
        x = x.matmul(self.layer_1_weights).add(self.layer_1_bias)
        x = x.relu()
        x = x.matmul(self.layer_2_weights).add(self.layer_2_bias)
        return x.squeeze()
    
    def nonrandom_init(self):
        self.layer_1_weights.data = tensor([[1.1, 0.8]])
        self.layer_1_bias.data = tensor([0.5 , -0.7]) 
        self.layer_2_weights.data = tensor([[0.3], [-0.7]])
        self.layer_2_bias.data = tensor([0.2])

In [None]:
nonlinear_model = Nonlinear(hidden_size=2)
nonlinear_model.nonrandom_init()

optim = torch.optim.SGD(nonlinear_model.parameters(), lr=0.2)
# optim = torch.optim.Adam(nonlinear_model.parameters(), lr=0.1)
loss_function = nn.MSELoss()

In [None]:
train(X, Y, nonlinear_model, loss_function, optim, num_epochs=200)

## Other shapes and activations

In [None]:
Y_sin = (2 * X).sin()
plt.plot(X.squeeze().numpy(), Y_sin.numpy(), 'r.')

In [None]:
# warning: 
# for 1-d problems it rarely works (often gets stuck in some local minimum)
nonlinear_model = Nonlinear(hidden_size=10)

optim = torch.optim.Adam(nonlinear_model.parameters(), lr=0.01)
loss_function = nn.MSELoss()
train(X, Y_sin, nonlinear_model, loss_function, optim, num_epochs=100)

In [None]:
class NonlinearSigmoid2(nn.Module):
    def __init__(self, hidden_size=2):
        super().__init__()
        
        self.layer_1_weights = nn.Parameter(torch.randn(1, hidden_size))
        self.layer_1_bias = nn.Parameter(torch.randn(hidden_size))
        
        self.layer_2_weights = nn.Parameter(torch.randn(hidden_size, 1))
        self.layer_2_bias = nn.Parameter(torch.randn(1))
        
    def forward(self, x):
        x = x.matmul(self.layer_1_weights).add(self.layer_1_bias)
        x = x.sigmoid()
        x = x.matmul(self.layer_2_weights).add(self.layer_2_bias)
        x = x.sigmoid()
        return x.squeeze()

In [None]:
X1 = torch.linspace(-2., 2., 30).unsqueeze(1)
Y1 = torch.cat([torch.zeros(10), 1. + torch.zeros(10),  torch.zeros(10)], dim=0)
plt.plot(X1.squeeze().numpy(), Y1.numpy(), 'r.')

In [None]:
nonlinear_model = NonlinearSigmoid2(hidden_size=2)
# optim = torch.optim.SGD(nonlinear_model.parameters(), lr=0.1)
optim = torch.optim.Adam(nonlinear_model.parameters(), lr=0.1)
loss_function = nn.MSELoss()
train(X1, Y1, nonlinear_model, loss_function, optim, num_epochs=100)

## Nonlinear model - by hand

In [None]:
my_nonlinear_model = Nonlinear(hidden_size=2)

In [None]:
my_nonlinear_model.layer_1_weights.data = tensor([[1. , 1.]])
my_nonlinear_model.layer_1_bias.data = tensor([1. , -1.])

In [None]:
X.matmul(my_nonlinear_model.layer_1_weights).add(my_nonlinear_model.layer_1_bias).relu()

In [None]:
my_nonlinear_model.layer_2_weights.data = tensor([[0.5], [-0.5]])
my_nonlinear_model.layer_2_bias.data = tensor([0.])

In [None]:
my_nonlinear_model(X)

In [None]:
plt.plot(X.squeeze(1).numpy(), Y.numpy(), 'r.')
plt.plot(X.squeeze(1).numpy(), my_nonlinear_model(X).detach().numpy(), '-')