In [8]:
import cupy as cp
import numpy
import torch

In [9]:
def forward_propagation_sm(g):
    # apply simple softmax
    y = cp.exp(g)
    return y / cp.sum(y)

def backward_propagation_sm(g, upstream_gradient):

    matrix = cp.zeros((len(upstream_gradient), len(upstream_gradient)))
    for i in range(len(upstream_gradient)):
        for j in range(len(upstream_gradient)):
            e1 = cp.exp(g[i])
            e2 = cp.exp(g[j])
            s1 = 1 / cp.sum(cp.exp(g))
            s2 = -1 * (1 / cp.sum(cp.exp(g))**2)
            if i == j:
                matrix[i][j] = e1*s2*e2 + e1*s1
            else:
                matrix[i][j] = e1*s2*e2
    return  upstream_gradient @ matrix


def forward_propagation_sm_normalization(g):
    # apply simple softmax and normalize (subtract maximum)
    y = cp.exp(g - cp.max(g))
    return y / cp.sum(y)

def backward_propagation_sm_normalization(g, upstream_gradient):
    # the same, because the normalization does not affect the gradient computation
    matrix = cp.zeros((len(upstream_gradient), len(upstream_gradient)))
    for i in range(len(upstream_gradient)):
        for j in range(len(upstream_gradient)):
            e1 = cp.exp(g[i])
            e2 = cp.exp(g[j])
            s1 = 1 / cp.sum(cp.exp(g))
            s2 = -1 * (1 / cp.sum(cp.exp(g))**2)
            if i == j:
                matrix[i][j] = e1*s2*e2 + e1*s1
            else:
                matrix[i][j] = e1*s2*e2
    return cp.dot(upstream_gradient, matrix)

In [10]:
# task 1

input = cp.array([0, -1, 21, 4])
dL_dy = cp.array([1, 3, 4, 2])

# forward pass
y_value = forward_propagation_sm(input)

# backpropogation
dL_dx = backward_propagation_sm(input, dL_dy)

print(f"Forward pass: {y_value}")
print(f"Backpropagation: {dL_dx}")

print()

input_tensor = torch.Tensor(input)
input_tensor.requires_grad = True
softmax = torch.nn.Softmax()
output_tensor = softmax(input_tensor)
print(f"Forward pass (torch): {output_tensor.data.numpy()}")
output_tensor.backward(torch.Tensor(dL_dy))
print(f"Backpropagation (torch): {input_tensor.grad.numpy()}")

Forward pass: [7.58256011e-10 2.78946797e-10 9.99999958e-01 4.13993754e-08]
Backpropagation: [-2.27476797e-09 -2.78946774e-10  8.53524618e-08 -8.27987473e-08]

Forward pass (torch): [7.5825601e-10 2.7894681e-10 1.0000000e+00 4.1399375e-08]
Backpropagation (torch): [-2.274768e-09 -2.789468e-10  0.000000e+00 -8.279875e-08]


  output_tensor = softmax(input_tensor)


In [11]:
def forward_propagation_rl(g):
    # note that d(ReLU)/dx (0) = 0 was chosen
    return cp.vectorize(lambda x: max(0, x))(g)

def backward_propagation_rl(g, upstream_gradient):
    matrix = cp.zeros((len(upstream_gradient), len(upstream_gradient)))
    for i in range(len(upstream_gradient)):
        for j in range(len(upstream_gradient)):
            if i == j:
                matrix[i][j] = 1 if g[i] > 0 else 0
            else:
                matrix[i][j] = 0
    return cp.dot(upstream_gradient, matrix)

In [12]:
# task 2

input = cp.array([0, -1, 21, 4])
dL_dy = cp.array([1, 3, 4, 2])

# forward pass
y_value = forward_propagation_rl(input)

# backpropogation
dL_dx = backward_propagation_rl(input, dL_dy)

print(f"Forward pass: {y_value}")
print(f"Backpropagation: {dL_dx}")

print()

input_tensor = torch.Tensor(input)
input_tensor.requires_grad = True
relu = torch.nn.ReLU()
output_tensor = relu(input_tensor)
print(f"Forward pass (torch): {output_tensor.data.numpy()}")
output_tensor.backward(torch.Tensor([1, 3, 4, 2]))
print(f"Backpropagation (torch): {input_tensor.grad.numpy()}")

Forward pass: [ 0  0 21  4]
Backpropagation: [0. 0. 4. 2.]

Forward pass (torch): [ 0.  0. 21.  4.]
Backpropagation (torch): [0. 0. 4. 2.]


In [13]:
def create_W(input_dim, output_dim):
    W_dim = (output_dim[0], input_dim[0])
    W = cp.random.rand(*W_dim)
    return W

def forward_pass(g, W):
    return W @ g

def backpropagation(W, upstream_gradient):
    return W.T.dot(upstream_gradient)

In [14]:
# task 3

input_dim =(2, 3)
output_dim = (5, 3)
W = create_W(input_dim, output_dim)

input = cp.random.rand(*input_dim)
dL_dy = cp.random.rand(*output_dim)

# forward pass
y_value = forward_pass(input, W)

# backpropogation
dL_dx = backpropagation(W, dL_dy)

print(f"Forward pass: {y_value}")
print(f"Backpropagation: {dL_dx}")

print()

input_tensor = torch.Tensor(input)
input_tensor.requires_grad = True
output_tensor = torch.Tensor(W) @ input_tensor
print(f"Forward pass (torch): {output_tensor.data.numpy()}")
output_tensor.backward(torch.Tensor(dL_dy))
print(f"Backpropagation (torch): {input_tensor.grad.numpy()}")

Forward pass: [[0.2515419  0.39103843 0.28332879]
 [0.55773177 0.75237831 0.53440061]
 [0.32120092 0.3022164  0.20051022]
 [0.67417465 0.66349992 0.44472393]
 [0.36416323 0.49937562 0.35557392]]
Backpropagation: [[1.31684986 1.0139439  0.9456753 ]
 [1.42927258 1.55612058 1.51959441]]

Forward pass (torch): [[0.2515419  0.39103842 0.28332877]
 [0.55773175 0.7523783  0.5344006 ]
 [0.32120094 0.3022164  0.20051023]
 [0.6741747  0.66349995 0.44472393]
 [0.36416325 0.49937564 0.35557392]]
Backpropagation (torch): [[1.3168498 1.0139439 0.9456753]
 [1.4292725 1.5561205 1.5195944]]
