In [1]:
import numpy as np
import torch

## 多项式分布（Multinomial distribution）

- 离散型概率分布
    - PMF
$$
\begin{align}
f(x_1,\ldots,x_k;n,p_1,\ldots,p_k) & {} = \Pr(X_1 = x_1 \text{ and } \dots \text{ and } X_k = x_k) \\
& {} = \begin{cases} { \displaystyle {n! \over x_1!\cdots x_k!}p_1^{x_1}\times\cdots\times p_k^{x_k}}, \quad &
\text{when } \sum_{i=1}^k x_i=n \\  \\
0 & \text{otherwise,} \end{cases}
\end{align}
$$

## `np.random.multinomial`

In [5]:
X = np.random.multinomial(20, [1/6.]*6, size=2)
X

array([[2, 4, 0, 4, 5, 5],
       [2, 6, 2, 6, 2, 2]])

- 第一个参数是 `n`，第二个参数是 `p_1, p_2, ..., p_k`，返回的是 `x_1, x_2, ..., x_k`

In [7]:
X.sum(axis=1)

array([20, 20])

## `torch.multinomial`

- https://pytorch.org/docs/stable/generated/torch.multinomial.html

- 输入的是 weights（未必是 probs 分布，加和为1），输出的是（基于weights）采样得到的 index；
    - must be non-negative, finite and have a non-zero sum.
    - 内部其实还是会将 weights normalize；
- replacement：默认为 false，无放回抽样，一个index只会被抽样一次；

In [9]:
weights = torch.tensor([0, 10, 3, 0], dtype=torch.float)

In [32]:
torch.multinomial(weights, 2)

tensor([2, 1])

In [34]:
torch.multinomial(weights, 4)

tensor([2, 1, 0, 3])

In [37]:
torch.multinomial(weights, 4, replacement=True)

tensor([2, 1, 1, 1])

In [30]:
torch.manual_seed(0)
probs = torch.tensor([1.0] + [0.0] * 999,  dtype=torch.double)
wrongs = 0
for i in range(1000000):
    sampled = torch.multinomial(probs, num_samples=1)
    if sampled != 0:
        wrongs += 1

print(f"{100 * wrongs / 1000000:.4f}")

0.0000


## `torch.distributions.categorical.Categorical`

- https://pytorch.org/docs/stable/distributions.html

In [8]:
from torch.distributions import Categorical

In [20]:
c = Categorical(weights)

In [21]:
c.probs

tensor([0.0000, 0.7692, 0.2308, 0.0000])

In [27]:
[c.sample() for _ in range(10)]

[tensor(1),
 tensor(2),
 tensor(1),
 tensor(1),
 tensor(1),
 tensor(1),
 tensor(2),
 tensor(1),
 tensor(1),
 tensor(2)]