In [None]:
%matplotlib inline

import matplotlib.pyplot as plt

import numpy as np

import torch
from torch import Tensor
from torch.nn import Module
from torch.nn.parameter import Parameter

from boundary import plot_decision_boundary
from gaussians import n_gaussians

## Data set

First we generate a toy data set for binary classification. We generate instances
around a middle point with a gaussian distribution.

In [None]:
X_train, Y_train = n_gaussians(np.array([[0.8, 0.2],[0.2, 0.8]]),
                               np.array([0, 1], dtype=np.float32), 200, stddev=0.3)
X_train = torch.tensor(X_train, dtype=torch.float)
Y_train = torch.tensor(Y_train, dtype=torch.int64)

X_validate, Y_validate = n_gaussians(np.array([[0.8, 0.2],[0.2, 0.8]]),
                                     np.array([0, 1], dtype=np.float32), 200, stddev=0.3)

Let's plot the the training instances to get a better idea of the distribution
of the data.

In [None]:
plt.scatter(X_train[:,0], X_train[:,1], c=Y_train, cmap=plt.cm.Spectral)

We will create a simple linear classifier without a probabilistic interpretation.
The normal outputs are just the raw outputs of $\mathbf{w}\cdot\mathbf{x} + b$.
Classification is performed by checking on which side of the boundary data points are.

In [None]:
class SimpleLinear(torch.nn.Module):
    def __init__(self, n_features: int):
        super(SimpleLinear, self).__init__()
        # The `Parameter` class wraps the parameters and registers them
        # so that we can e.g. get them through the `parameters` method
        # of module.
        self.weights = Parameter(torch.randn(n_features))
        self.bias = Parameter(torch.randn(()))
        
    def classify(self, x: Tensor) -> Tensor:
        activation = self.forward(x)
        return torch.where(activation < 0, 0, 1)
    
    def forward(self, x: Tensor) -> Tensor:
        # Note that we flip `w` and `x` here. The reason is that `x` is
        # not a single instance, but can be multiple instances and is
        # therefore laid out as a matrix `[n_instances, 2]`. The `@`
        # operator requires that last dimension of the first operand
        # is the same as the second dimension of the second operand.
        # We will discuss why this is the case in the lecture on matrix
        # multiplication.
        return x @ self.weights + self.bias

Such a linear classifier can be trained with [hinge loss](https://en.wikipedia.org/wiki/Hinge_loss).

In [None]:
class HingeLoss(torch.nn.Module):
    def forward(self, predictions: Tensor, targets: Tensor):
        targets = targets.clone()
        targets[targets == 0] = -1
        return (1.0 - targets * predictions).maximum(torch.tensor(0.)).mean()
        

In [None]:
model = SimpleLinear(X_train.shape[1])
loss_function = HingeLoss()

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

In [None]:
for step in range(100):
    optimizer.zero_grad()
    loss = loss_function(model(X_train), Y_train)    
    loss.backward()
    optimizer.step()
    
    with torch.no_grad():
        acc = model.classify(X_train).eq(Y_train).to(torch.float).mean()

    print(f"Step: {step}, loss: {loss}, acc: {acc}")


In [None]:
fig = plt.figure(figsize=(8, 6))
sub = fig.add_subplot(111)

def classify(x):
    x = torch.tensor(x, dtype=torch.float)
    return model.classify(x).detach().numpy()

plot_decision_boundary(sub, lambda x: classify(x), X_validate, Y_validate,
                           batch_size = X_validate.shape[0],
                           x_range = (X_validate[:,0].min(), X_validate[:,0].max()),
                           y_range = (X_validate[:,1].min(), X_validate[:,1].max()))