# Demos of some calculations

In [1]:
import torch
import numpy as np
torch.cuda.is_available()

True

Set $x = 3$. Then differentiate $x$. The expected result is:

\begin{align*}
\frac{\partial x}{\partial x}\Big|_{x=3} = 1
\end{align*}

In [2]:
x = torch.tensor(3., requires_grad = True)
x.backward()
print(x, x.grad)

x.grad = None
y = x
y.backward()
print(y, y.grad)

tensor(3., requires_grad=True) tensor(1.)
tensor(3., requires_grad=True) tensor(1.)


Execute several calculations and see the results.

- $y = x^2$: In this case the expected result is:
\begin{align*}
\frac{\partial y}{\partial x}\Big|_{x=3} = \frac{\partial x^2}{\partial x}\Big|_{x=3} = 2x\big|_{x=3} = 6
\end{align*}

- $y = x^2 + x^3$: In this case the expected result is:
\begin{align*}
\frac{\partial y}{\partial x}\Big|_{x=3} = \left(\frac{\partial x^2}{\partial x} + \frac{\partial x^3}{\partial x}\right)\Bigg|_{x=3} = (2x + 3x^2)\big|_{x=3} = 33
\end{align*}

In [3]:
x = torch.tensor(3., requires_grad = True)
y = x**2
y.backward()
print(y, x.grad)

x.grad = None
y = x**2 + x**3
y.backward()
print(y, x.grad)

tensor(9., grad_fn=<PowBackward0>) tensor(6.)
tensor(36., grad_fn=<AddBackward0>) tensor(33.)


## A little complicated functions

So-called *sigmoid function* is defined as $\varsigma(x) = \frac{1}{1 + \exp(-x)}$. Its derivative is calculated as $\varsigma^\prime(x) = \frac{\exp(-x)}{(1 + \exp(-x))^2}$.

The *hyperbolic tangent function* is defined as $\tanh(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}$ and its derivative is $\tanh^\prime(x) = \frac{4}{(\exp(x) + \exp(-x))^2}$.

Then set $y = \varsigma(x^2) + \tanh(x^3)$ and differentiate it using the chain rule:

\begin{align*}
\frac{\partial y}{\partial x} \Big|_{x=3} &= \varsigma^\prime(x^2)\cdot (2x)\Big|_{x=3} + \tanh^\prime(x^3)\cdot (3x^2)\Big|_{x=3} \\
&= \frac{\exp(-x^2)}{(1 + \exp(-x^2))^2}\cdot (2x)\Big|_{x=3} + \frac{4}{(\exp(x^3) + \exp(-x^3))^2}\cdot (3x^2)\Big|_{x=3} \simeq 0.00074
\end{align*}

In [4]:
# validation by numpy

sigmoid = lambda x: 1/(1 + np.exp(-x))
sigmoid_prime = lambda x: np.exp(-x)/(1 + np.exp(-x))**2
tanh = lambda x: (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
tanh_prime = lambda x: 4/(np.exp(x) + np.exp(-x))**2

f = lambda x: sigmoid(x**2) + tanh(x**3)

y = f(3)

# direct calculation
y_prime = sigmoid_prime(3**2) * (2*3) + tanh_prime(3**3) * (3 * 3**2)

# numerical approximation
y_prime_approx = (f(3.0005) - f(2.9995))/.001

print(y, y_prime, y_prime_approx)

1.9998766054240136 0.0007402760985890935 0.0007402770232367573


In [5]:
x = torch.tensor(3., requires_grad = True)
y = torch.sigmoid(x**2) + torch.tanh(x**3)
y.backward()
print(y, x.grad)

tensor(1.9999, grad_fn=<AddBackward0>) tensor(0.0007)


## multiple variables

Set $x=3$ and $z = 2$. Then we consider $y = \varsigma(x^2 + z) + \tanh(x^3 z^2)$. In this case its partial derivatives are calculated as:

\begin{align*}
\frac{\partial y}{\partial x} \Big|_{x=3\\z=2} &= \varsigma^\prime(x^2 + z)\cdot (2x)\Big|_{x=3\\z=2} + \tanh^\prime(x^3 z^2)\cdot (3x^2 z^2)\Big|_{x=3\\z=2} \\
&= \frac{\exp(-(x^2 + z))}{(1 + \exp(-(x^2 + z)))^2}\cdot (2x)\Big|_{x=3\\z=2} + \frac{4}{(\exp(x^3 z^2) + \exp(-x^3 z^2))^2}\cdot (3x^2 z^2)\Big|_{x=3\\z=2} \\
&\simeq 0.0001002 \\
\frac{\partial y}{\partial z} \Big|_{x=3\\z=2} &= \varsigma^\prime(x^2 + z)\cdot 1\Big|_{x=3\\z=2} + \tanh^\prime(x^3 z^2)\cdot (2x^3 z)\Big|_{x=3\\z=2} \\
&= \frac{\exp(-(x^2 + z))}{(1 + \exp(-(x^2 + z)))^2}\Big|_{x=3\\z=2} + \frac{4}{(\exp(x^3 z^2) + \exp(-x^3 z^2))^2}\cdot (2x^3 z)\Big|_{x=3\\z=2} \\
&\simeq 0.0000167
\end{align*}

In [6]:
# validation by numpy

y = sigmoid(3**2 + 2) + tanh(3**3 * 2**2)

# direct calculation
y_prime_x = sigmoid_prime(3**2 + 2) * (2*3) + \
    tanh_prime(3**3 * 2**2) * (3 * 3**2 * 2**2)

y_prime_z = sigmoid_prime(3**2 + 2) + \
    tanh_prime(3**3 * 2**2) * (2 * 3**3 * 2)

print(y, y_prime_x, y_prime_z)

1.999983298578152 0.0001002068574636206 1.6701142910603434e-05


In [7]:
x = torch.tensor(3., requires_grad = True)
z = torch.tensor(2., requires_grad = True)
y = torch.sigmoid(x**2 + z) + torch.tanh(x**3 * z**2)
y.backward()
print(y, x.grad, z.grad)

tensor(2.0000, grad_fn=<AddBackward0>) tensor(0.0001) tensor(1.6689e-05)


## skip-connection

Set $x=3$. Then we consider $y = \tanh(x + \varsigma(x^2))$. In this case its partial derivatives are calculated as:

\begin{align*}
\frac{\partial y}{\partial x} &= \tanh^\prime(x + \varsigma(x^2))\cdot (1 + \varsigma^\prime (x^2) \cdot (2x)) \Big|_{x=3} \\
&= \frac{4}{(\exp(x + \varsigma(x^2)) + \exp(-(x + \varsigma(x^2))))^2}\cdot \left( 1 + \frac{\exp(-x^2)}{(1 + \exp(-x^2))^2} \cdot (2x) \right) \Bigg|_{x=3} \\
&\simeq 0.00134
\end{align*}

In [8]:
# validation by numpy

y = tanh(3 + sigmoid(3**2))

# direct calculation
y_prime = tanh_prime(3 + sigmoid(3**2)) * (1 + sigmoid_prime(3**2) * (2*3))

print(y, y_prime)

0.9993291342526205 0.0013422743524949985


In [9]:
x = torch.tensor(3., requires_grad = True)
y = torch.tanh(x + torch.sigmoid(x**2))
y.backward()
print(y, x.grad)

tensor(0.9993, grad_fn=<TanhBackward>) tensor(0.0013)
