# Chapter 2
**Probability Distributions Using PyTorch**

In [1]:
import torch
from torch.autograd import Variable
import torch.distributions as distributions

## Recipe 2-1. Sampling Tensors
Weight initialization is an important task in training a neural network and any kind of deep learning model, such as a convolutional neural network (CNN), a deep neural network (DNN), and a recurrent neural network (RNN).

In [2]:
torch.manual_seed(20210620)
torch.randn(4, 4)

tensor([[-1.2169,  1.5546, -2.1656, -0.7989],
        [ 1.1052, -0.6994,  1.0420,  0.1689],
        [ 0.5121,  0.9921, -0.5160, -1.3243],
        [ 1.2463,  0.2345,  0.9326, -0.5185]])

Uniform distribution
$$P(X)=\begin{cases}\frac{1}{b-a}&a\le X\le b\\0&X>a,X<b\end{cases}$$

In [3]:
torch.Tensor(4, 4).uniform_(0, 1)

tensor([[0.6739, 0.1073, 0.8637, 0.1027],
        [0.5573, 0.6963, 0.3788, 0.1305],
        [0.5607, 0.7941, 0.4270, 0.4455],
        [0.3962, 0.7553, 0.7715, 0.6997]])

Bernoulli distribution
$$\begin{cases}P(X=1)=p\\P(X=0)=1-p\end{cases}$$

In [4]:
# way 1
print(torch.Tensor(4, 4).bernoulli(0.5))
# way 2
print(torch.bernoulli(torch.Tensor(4, 4).uniform_(0, 1)))

tensor([[1., 1., 0., 1.],
        [0., 0., 0., 1.],
        [0., 1., 0., 0.],
        [0., 1., 1., 0.]])
tensor([[1., 0., 0., 1.],
        [0., 0., 1., 1.],
        [1., 1., 0., 0.],
        [1., 0., 0., 1.]])


Multinomial distribution
$$\mathbf X\thicksim \text{Mult}_k(n, \mathbf p)$$

In [5]:
# without replacement
print(torch.multinomial(
    torch.Tensor(16).uniform_(0, 1),
    num_samples=3
))
# with replacement
print(torch.multinomial(
    torch.Tensor(16).uniform_(0, 1),
    num_samples=5,
    replacement=True
))

tensor([ 2, 14,  9])
tensor([ 8,  8, 14, 14,  4])


Normal distribution
$$X\thicksim\mathcal N(\mu,\sigma^2)$$

In [6]:
# each element is in distribution of N(mean, std^2)
print(torch.normal(
    mean=torch.linspace(1, 10, 10),
    std=torch.linspace(1, 0.1, 10)
))

print(torch.normal(
    mean=0.5,
    std=torch.linspace(1, 5, 5)
))

print(torch.normal(
    mean=-0.5, std=1.0, size=(10,)
))


tensor([-0.7136,  0.0261,  3.0238,  3.9522,  5.3662,  5.7489,  6.8118,  7.5106,
         8.8585,  9.9411])
tensor([ 0.5567, -2.2587,  1.3832,  2.8100,  8.0504])
tensor([-0.8894, -1.0986, -1.6995, -1.2653,  1.5386, -0.4245,  0.2302,  1.8631,
        -0.4695,  0.9538])


## Recipe 2-2. Variable Tensors
What is a variable in PyTorch and how is it defined? What is a random variable in PyTorch?

**Computational Graph**

In [7]:
row = 10
col = 15
# using autograd module to create a varible
x1 = Variable(torch.randn(row, col), 
              requires_grad=True)
x2 = Variable(torch.randn(row, col), 
              requires_grad=True)
x3 = Variable(torch.randn(row, col), 
              requires_grad=True)

In [8]:
a = torch.sum(x1 * x2 * x3)
print(a)
a.backward()
print(a)

tensor(-10.3499, grad_fn=<SumBackward0>)
tensor(-10.3499, grad_fn=<SumBackward0>)


## Recipe 2-3. Basic Statistics Problem
How do we compute basic statistics, such as mean, median, mode, and so forth, from a Torch tensor?

In [9]:
n = torch.normal(mean=1, std=2, size=(4, 5))
# mean
print(torch.mean(n))
# mean of axis 0 (or dim=0)
print(torch.mean(n, axis=0))
# mean of axis 1 (or dim=1)
print(torch.mean(n, axis=1))

tensor(0.4291)
tensor([-0.7599,  1.1393,  1.9067, -0.4588,  0.3184])
tensor([0.8684, 0.2926, 0.4962, 0.0594])


In [10]:
# mode, default axis=-1
print(torch.mode(n))
# the same for median, std, var...
print(torch.median(n))
print(torch.std(n))
print(torch.var(n))

torch.return_types.mode(
values=tensor([-1.7531, -3.4031, -0.0736, -2.2050]),
indices=tensor([0, 4, 4, 0]))
tensor(0.2540)
tensor(1.9757)
tensor(3.9035)


## Recipe 2-4. Gradient Computation Problem
How do we compute basic gradients from the sample tensors?

**Actually, it is the first neuron netword done by me independently.**

In [11]:
# input
x = torch.tensor([[11.0, 22.0, 33.0]]).T
# output
y = torch.tensor([[21.0, 14.0, 64.0, 72.0]]).T
# weights
w = Variable(torch.randn(y.shape[0], x.shape[0]), requires_grad=True)
# bias
b = Variable(torch.randn(y.shape[0], 1), requires_grad=True)

In [12]:
epochs = 10
learing_rate = 0.001
for epoch in range(epochs):
    # forwarding
    y_pred = torch.matmul(w, x) + b
    # calculating loss
    loss_val = torch.nn.functional.mse_loss(y_pred, y)
    # backwarding
    loss_val.backward()
    # gradient descent
    w.data = w.data - learing_rate * w.grad.data
    # set the gradient to 0 after updating weights
    w.grad.data.zero_()
# check result
print("w = ", w.data, sep='\n')
print("b = ", b.data, sep='\n')
print("w * x + b = ", (torch.matmul(w, x) + b).data, sep='\n')

w = 
tensor([[-0.7932,  0.4884,  0.6597],
        [ 0.9782, -0.1713,  0.2143],
        [ 1.2128,  1.2078,  0.7503],
        [ 0.6340,  0.9193,  1.3641]])
b = 
tensor([[-2.7905],
        [-0.0658],
        [-0.6719],
        [-0.2135]])
w * x + b = 
tensor([[21.0000],
        [14.0000],
        [64.0000],
        [72.0000]])


## Recipe 2-5. Tensor Operations
How do we compute or perform operations based on variables such as matrix multiplication?

In [13]:
x = Variable(torch.Tensor(4, 4).uniform_(-4, 5))
y = Variable(torch.Tensor(4, 4).uniform_(0, 1))
z = torch.mm(x, y)
# some featurs
print(z.shape)
print(z.requires_grad)
print(z.grad)

torch.Size([4, 4])
False
None


## Recipe 2-6. Tensor Operations
How do we compute or perform operations based on variables such as matrix-vector computation, and matrix-matrix and vector-vector calculation?

In [14]:
mat1 = torch.Tensor(4, 4).uniform_(0, 1)
mat2 = torch.Tensor(5, 4).uniform_(0, 1)
vec1 = torch.Tensor(4).uniform_(0, 1)
print(mat1 + 10)
print(mat1 + vec1)
print(mat1 * mat1)
print(mat2 @ mat1)

tensor([[10.0659, 10.0311, 10.4802, 10.9511],
        [10.2063, 10.4168, 10.7995, 10.2437],
        [10.9632, 10.6569, 10.8092, 10.1809],
        [10.7521, 10.4165, 10.8021, 10.2873]])
tensor([[0.6744, 0.5480, 1.2231, 1.5265],
        [0.8148, 0.9337, 1.5424, 0.8191],
        [1.5717, 1.1737, 1.5521, 0.7564],
        [1.3606, 0.9334, 1.5449, 0.8628]])
tensor([[0.0043, 0.0010, 0.2306, 0.9045],
        [0.0426, 0.1738, 0.6392, 0.0594],
        [0.9278, 0.4315, 0.6548, 0.0327],
        [0.5656, 0.1735, 0.6433, 0.0826]])
tensor([[1.2316, 0.9109, 1.4944, 0.5181],
        [0.6588, 0.5922, 1.0882, 0.6472],
        [0.8588, 0.6898, 1.1993, 0.5359],
        [1.1917, 0.9253, 1.5775, 0.5027],
        [1.0030, 0.8662, 1.5930, 0.7474]])


## Recipe 2-7. Tensor Operations
How do we know which distributions to
use and when to use them?

In [15]:
# B(0.5, 0.5)
d = distributions.beta.Beta(
    torch.tensor([0.5]), torch.tensor([0.5])
)
d.sample()

tensor([0.4026])

In [16]:
# multi_k(0, 0.2, 0.8, 1.0)
d = distributions.binomial.Binomial(
    100, torch.tensor([0, 0.2, 0.8, 1.0])
)
d.sample()

tensor([  0.,  24.,  83., 100.])

In [17]:
# event probability
d = distributions.categorical.Categorical(
    torch.tensor([0.2, 0.2, 0.2, 0.2, 0.2])
)
d.sample()

tensor(3)

In [18]:
# L(10, 0.99)
d = distributions.laplace.Laplace(
    torch.tensor([10.0]), torch.tensor([0.99])
)
d.sample()

tensor([11.0169])

In [19]:
# N(100, 10)
d = distributions.normal.Normal(
    torch.tensor([100.0]), torch.tensor([10.0])
)
d.sample()

tensor([114.9374])