# Introduction

This notebook is for generating plots for Chapter 4.2 (Experimental Results, Synthetic Data - Multiclass).

It generates plots for decision regions of different linear multiclass classifiers for 2D data, learned from different loss functions and with post-hoc updates to the discriminant functions .

# Imports and set-up

In [None]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
import os

In [None]:
import matplotlib.pylab as pylab

params = {
    "legend.fontsize": "x-large",
    "axes.labelsize": "x-large",
    "axes.titlesize": "x-large",
    "xtick.labelsize": "x-large",
    "ytick.labelsize": "x-large",
}
pylab.rcParams.update(params)

Use GPU if available

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device")
print("-" * 60)
print(device)

Create data directory, if it doesn't exist.

In [None]:
if not os.path.exists("../data"):
    os.makedirs("../data")

# Generate the data

In [None]:
class_frequencies = [32 / 80] + [16 / 80] * 2 + [6 / 80] * 2 + [1 / 80] * 4
N = 2000

In [None]:
np.random.seed(2022)
covariance = np.identity(2) * 0.25

# A is the most dominant class, {B,C} are next most dominant class, then {D,E} then {F,G,H,I}
sampA = np.random.multivariate_normal([0, 0], covariance, int(N * class_frequencies[0]))
sampB = np.random.multivariate_normal([0, 2], covariance, int(N * class_frequencies[1]))
sampC = np.random.multivariate_normal(
    [0, -2], covariance, int(N * class_frequencies[2])
)
sampD = np.random.multivariate_normal([2, 0], covariance, int(N * class_frequencies[3]))
sampE = np.random.multivariate_normal(
    [-2, 0], covariance, int(N * class_frequencies[4])
)
sampF = np.random.multivariate_normal(
    [-2, 2], covariance, int(N * class_frequencies[5])
)
sampG = np.random.multivariate_normal([2, 2], covariance, int(N * class_frequencies[6]))
sampH = np.random.multivariate_normal(
    [-2, -2], covariance, int(N * class_frequencies[7])
)
sampI = np.random.multivariate_normal(
    [2, -2], covariance, int(N * class_frequencies[8])
)

X_values = np.concatenate(
    (sampA, sampB, sampC, sampD, sampE, sampF, sampG, sampH, sampI), axis=0
)
y_flat = np.array([])

for i in range(9):
    y_flat = np.concatenate((y_flat, np.array([i] * int(N * class_frequencies[i]))))

In [None]:
np.random.seed()
index_shuffle = np.arange(len(X_values))
np.random.shuffle(index_shuffle)

X_values = X_values[index_shuffle]
y_flat = y_flat[index_shuffle]

In [None]:
X_train = torch.tensor(X_values).type(torch.FloatTensor).to(device)
y_train = torch.from_numpy(y_flat).view(-1, 1).type(torch.LongTensor).to(device)

# Visualise the data

In [None]:
# Draw decision boundaries of the Bayes classifier
plt.axvline(x=1, color="k", linestyle=":", label="Bayes classifier")
plt.axvline(x=-1, color="k", linestyle=":")
plt.axhline(y=1, color="k", linestyle=":")
plt.axhline(y=-1, color="k", linestyle=":")
plt.xlim([-4, 4])
plt.ylim([-4, 4])

# Plot the data
plt.plot(sampA[:, 0], sampA[:, 1], "cx", label="Dominant")
plt.plot(sampB[:, 0], sampB[:, 1], ".y", label="Medium")
plt.plot(sampC[:, 0], sampC[:, 1], ".y")
plt.plot(sampD[:, 0], sampD[:, 1], ".g", label="Medium")
plt.plot(sampE[:, 0], sampE[:, 1], ".g")
plt.plot(sampF[:, 0], sampF[:, 1], "sr", label="Rare")
plt.plot(sampG[:, 0], sampG[:, 1], "sr")
plt.plot(sampH[:, 0], sampH[:, 1], "sr")
plt.plot(sampI[:, 0], sampI[:, 1], "sr")

# Formatting plot, adds title, axis labels, saves plot etc.
plt.title("Generated data by class", fontsize=25)
plt.xlabel("x1", fontsize=20)
plt.ylabel("x2", fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.legend(loc="upper left", fontsize=15)
plt.axis("scaled")
plt.axis([-4, 4, -4, 4])
figure = plt.gcf()
figure.set_size_inches(8, 8)
plt.savefig("../data/imbalMultiData", dpi=1500)
plt.show()

# Model training functions

In [None]:
# Now there are 9 classes, so 9 logits/output nodes


class LinearClassifier(nn.Module):
    """
    A simple linear classifier module.

    This class defines a linear classifier with a specified input dimension and output dimension.

    Args:
        input_dim (int): The dimension of the input features.
        output_dim (int): The number of output classes.

    Attributes:
        linear (nn.Linear): The linear transformation layer.

    """

    def __init__(self, input_dim=2, output_dim=9):
        super(LinearClassifier, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        """
        Forward pass of the linear classifier.

        This method computes the forward pass of the linear classifier.

        Args:
            x (torch.Tensor): The input tensor of shape (batch_size, input_dim).

        Returns:
            torch.Tensor: The output tensor of shape (batch_size, output_dim).
        """
        x = self.linear(x)
        return x

In [None]:
def modelTrain(
    model: nn.Module,
    optimizer: torch.optim.Optimizer,
    criterion,
    X_train: torch.Tensor,
    y_train: torch.Tensor,
):
    """
    Train a PyTorch model using a given optimizer and loss criterion.

    This function performs training for a specified number of epochs. It includes the forward pass, backward pass, and optimization steps.

    Args:
        model (nn.Module): The PyTorch model to be trained.
        optimizer (torch.optim.Optimizer): The optimizer used for updating model parameters.
        criterion: The loss criterion used to compute the loss.
        X_train (torch.Tensor): The input data tensor for training.
        y_train (torch.Tensor): The target labels tensor for training.

    Returns:
        None
    """
    for epoch in range(50000):
        # zero the gradients
        optimizer.zero_grad()

        # forward
        output = model(X_train)
        loss = criterion(output, y_train.view(-1))

        # backward
        loss.backward()

        # optimize
        optimizer.step()

# Train the models

In [None]:
model_orig = LinearClassifier().to(device)

## Vanilla ERM

In [None]:
model_erm = deepcopy(model_orig)
criterion_erm = nn.CrossEntropyLoss()
optimizer_erm = torch.optim.SGD(model_erm.parameters(), lr=0.01)

In [None]:
# trains model_erm under vanilla ERM with cross-entropy
modelTrain(
    model=model_erm,
    optimizer=optimizer_erm,
    criterion=criterion_erm,
    X_train=X_train,
    y_train=y_train,
)

## Weighted ERM

The weights are inverse class probabilities

In [None]:
model_w = deepcopy(model_orig)
optimizer_w = torch.optim.SGD(model_w.parameters(), lr=0.01)
# class-weight the cross-entropy
criterion_w = nn.CrossEntropyLoss(
    weight=torch.tensor(np.reciprocal(class_frequencies))
    .type(torch.FloatTensor)
    .to(device)
)

In [None]:
modelTrain(
    model=model_w,
    optimizer=optimizer_w,
    criterion=criterion_w,
    X_train=X_train,
    y_train=y_train,
)

# Plots

A mesh for visualising decision boundaries

In [None]:
# Creates a mesh of ~800^2 points in [-4,4]^2
x1_fit = np.linspace(-4, 4, 800)
x2_fit = np.linspace(-4, 4, 800)

mesh_grid = np.meshgrid(x1_fit, x2_fit)

X_mesh = np.hstack([mesh_grid[0].reshape(-1, 1), mesh_grid[1].reshape(-1, 1)]).astype(
    np.float32
)
X_mesh = torch.from_numpy(X_mesh)

In [None]:
# Produces scatter plot, with Bayes classifier. Each point on the dense mesh is coloured by the predicted class


def mesh_plot(y_pred: torch.Tensor, title: str, fileName: str):
    """Create a plot which shows the decision boundaries by colouring region by predicted class.

    Args:
        y_pred (torch.Tensor): A tensor of class predictions for X_mesh
        title (str): Title of the plot
        fileName (str): Filename or path to save the file (you need to include an extension, like .png)
    """
    plt.axvline(x=1, color="k", linestyle=":")
    plt.axvline(x=-1, color="k", linestyle=":")
    plt.axhline(y=1, color="k", linestyle=":")
    plt.axhline(y=-1, color="k", linestyle=":")
    plt.title(title, fontsize=25)
    y_pred_colours = [
        ["c", "y", "y", "g", "g", "r", "r", "r", "r"][i]
        for i in y_pred.cpu().detach().numpy()
    ]
    plt.scatter(X_mesh[:, 0], X_mesh[:, 1], c=y_pred_colours, marker=".")
    plt.xlabel("x1", fontsize=20)
    plt.ylabel("x2", fontsize=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.axis("scaled")
    plt.axis([-4, 4, -4, 4])
    figure = plt.gcf()
    figure.set_size_inches(8, 8)
    plt.savefig(fileName, dpi=800)
    plt.show()

## Vanilla ERM

In [None]:
y_pred_erm = model_erm.forward(X_mesh.to(device))
y_pred_erm = torch.argmax(y_pred_erm, axis=1).to(device)
mesh_plot(y_pred_erm, "ERM", "../data/imbalMultiVanilla.png")

## Vanilla ERM + additive update

In [None]:
adjustments = (
    torch.from_numpy(np.log(class_frequencies)).type(torch.FloatTensor).to(device)
)

# modified predictions with additive update
y_pred_adjusted = torch.argmax(
    model_erm.forward(X_mesh.to(device)) - adjustments, axis=1
).to(device)

mesh_plot(y_pred_adjusted, "Additive update", "../data/imbalMultiAdditive.png")

## Weighted ERM

In-built weighting of CrossEntropyLoss with weight argument in PyTorch. This gives the loss function as in Menon (4); corresponding to 'balancing' the loss.

In [None]:
y_pred_w = model_w.forward(X_mesh.to(device))
y_pred_w = torch.argmax(y_pred_w, axis=1)
mesh_plot(y_pred_w, "Class-weighted ERM", "../data/imbalMultiWeighted.png")