In [215]:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F

In [33]:
epsilon = 1e-6

# 1. Activation Functions

## Tanh

In [42]:
def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

In [51]:
x = torch.randn(5, requires_grad=False)
y = nn.Tanh()(x).numpy()
y_ = tanh(x.numpy())
print(y-y_)

[ 0.0000000e+00  0.0000000e+00  5.9604645e-08  0.0000000e+00
 -2.9802322e-08]


## Sigmoid

In [41]:
def sigmoid(x):
    return 1. / (1. + np.exp(-x))

In [52]:
x = torch.randn(5, requires_grad=False)
y = nn.Sigmoid()(x).numpy()
y_ = sigmoid(x.numpy())
print(y-y_)

[0. 0. 0. 0. 0.]


## Cross Entropy

In [206]:
def softmax(x, axis=None):
    return np.exp(x)/np.exp(x).sum(axis=axis, keepdims=True)

In [207]:
x = torch.randn(5, 3, requires_grad=False)
y = nn.Softmax()(x).numpy()
y_ = softmax(x.numpy(), axis=1)
print(y-y_)

[[-7.4505806e-09  0.0000000e+00 -5.9604645e-08]
 [ 0.0000000e+00  5.9604645e-08  2.9802322e-08]
 [ 2.9802322e-08  7.4505806e-09  5.9604645e-08]
 [ 5.9604645e-08  0.0000000e+00  2.9802322e-08]
 [ 1.4901161e-08  5.9604645e-08  2.9802322e-08]]


  y = nn.Softmax()(x).numpy()


## ReLU

In [70]:
def relu(x):
    return np.maximum(x, np.zeros_like(x))

In [71]:
x = torch.randn(5, requires_grad=False)
y = nn.ReLU()(x).numpy()
y_ = relu(x.numpy())
print(y-y_)

[0. 0. 0. 0. 0.]


# 2. Loss

## MSE Loss

In [182]:
def mse_loss(y_pred, y_true):
    # y_pred: n x c
    # y_true: n x c
    return ((y_true-y_pred)**2).mean()

In [183]:
# veryfication:
loss = nn.MSELoss()
input = torch.randn(3, 5, requires_grad=False)
target = torch.randn(3, 5, requires_grad=False)
output = loss(input, target)

In [184]:
input_np = input.numpy()
target_np = target.numpy()
output_np = mse_loss(input_np, target_np)

In [185]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## BCE Loss

In [186]:
def bce_loss(y_pred, y_true):
    # y_pred = n x 2 (float range (0~1))
    # y_true = n x 2 (one hot integer)
    y_pred = np.clip(y_pred, epsilon, 1-epsilon)
    return -(y_true * np.log(y_pred) + (1-y_true)*np.log(1-y_pred)).mean()

In [187]:
m = nn.Sigmoid()
loss = nn.BCELoss()
input = torch.randn(3, 2, requires_grad=False)
target = torch.rand(3, 2, requires_grad=False)
output = loss(m(input), target)

In [188]:
input_np = input.numpy()
target_np = target.numpy()
output_np = bce_loss(sigmoid(input_np), target_np)

In [189]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## NLL Loss

In [190]:
def nllloss(y_pred, y_true):
    # y_pred: n x c float array after log softmax
    # y_true: n integer array
    indices = np.arange(len(y_true)).astype(int)
    return -y_pred[indices, y_true.astype(int), ...].mean()

In [191]:
m = nn.LogSoftmax(dim=1)
loss = nn.NLLLoss()
input = m(torch.randn(3, 5, requires_grad=False))
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)

In [192]:
input_np = input.numpy()
target_np = target.numpy()
output_np = nllloss(input_np, target_np)

In [193]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## CrossEntropy Loss

In [257]:
def cross_entropy(y_pred, y_true):
    # y_pred: n x c float array
    # y_true: n integer array
    y_pred = np.log(softmax(y_pred, axis=1))
    indices = np.arange(len(y_true)).astype(int)
    return -y_pred[indices, y_true.astype(int), ...].mean()

In [258]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=False)
target = torch.empty(3, dtype=torch.long, requires_grad=False).random_(5)
output = loss(input, target)

In [259]:
input_np = input.numpy()
target_np = target.numpy()
output_np = cross_entropy(input_np, target_np)

In [260]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"

## KLDiv Loss

In [261]:
def kldivloss(y_pred, y_true):
    # y_pred: n x c after log softmax
    # y_true: n x c after softmax
    return (y_true * (np.log(y_true) - y_pred)).mean()

In [262]:
kl_loss = nn.KLDivLoss(reduction="mean")
input = F.log_softmax(torch.randn(3, 5, requires_grad=False), dim=1)
target = F.softmax(torch.rand(3, 5), dim=1)
output = kl_loss(input, target)

In [263]:
input_np = input.numpy()
target_np = target.numpy()
output_np = kldivloss(input_np, target_np)

In [264]:
assert np.abs(output.item() - output_np) < epsilon, "wrong implementation"