In [None]:
import torch
import torch.nn as nn

**MultiClassClassificationSoftmax Notes**

$ \mathbf{x} \in \mathbb{R}^n$ be the input feature vector

$W \in \mathbb{R}^{C \times n}$ be the weight matrix

$b \in \mathbb{R}^C$ be the bias vector

$C$ be the number of classes

The model computes the class scores:
$$
z = W\mathbf{x} + b
$$

The softmax function converts these scores into probabilities:
$$
\text{softmax}(z_i) = \frac{e^{z_i}}{\sum_{j=1}^C e^{z_j}}
$$

where $z_i$ is the score for class $i$.

**Interpretation:**
- The output is a probability distribution over $C$ classes.
- Each element is in $[0, 1]$ and the sum over all classes is $1$.
- The predicted class is usually $\arg\max_i \text{softmax}(z_i)$.

**Usage:**
- Suitable for multi-class classification where each input belongs to exactly one class.
- Commonly used with the cross-entropy loss

In [None]:
class MultiClassClassificationSoftmax(torch.nn.Module):
    def __init__(self, input_dim, n_classes):
        super().__init__()
        self.fc = torch.nn.Linear(input_dim, n_classes)

    def forward(self, x):
        return nn.functional.softmax(self.fc(x), dim=1)

In [None]:
model = MultiClassClassificationSoftmax(10, 4)
x = torch.ones(1, 10)
model(x)

tensor([[0.1124, 0.4962, 0.2699, 0.1215]], grad_fn=<SoftmaxBackward0>)

In [8]:
model(x).sum(dim=1)

tensor([1.], grad_fn=<SumBackward1>)

In [20]:
class MultiBinaryClassificationSigmoid(torch.nn.Module):
    def __init__(self, input_dim, n_classes):
        super().__init__()
        self.fc = torch.nn.Linear(input_dim, n_classes)

    def forward(self, x):
        return nn.functional.sigmoid(self.fc(x))

In [21]:
model = MultiBinaryClassificationSigmoid(10, 4)
x = torch.ones(1, 10)
model(x)

tensor([[0.6422, 0.5714, 0.2768, 0.5464]], grad_fn=<SigmoidBackward0>)

In [22]:
model(x).sum(dim=1)

tensor([2.0367], grad_fn=<SumBackward1>)