In [None]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
torch.__version__

'1.13.1+cu116'

## Task 1

In [None]:
x1 = [1.0, 2.0, 3.0]
x2 = [4.0, 5.0, 6.0]

w1 = [[0.1, 0.2, 0.3, 0.4],
      [0.5, 0.6, 0.7, 0.8],
      [0.9, 1.0, 1.1, 1.2]]
w2 = [[0.2, 0.3],
      [0.4, 0.5],
      [0.6, 0.7],
      [0.8, 0.9]]

In [None]:
# torch version
x1_t = torch.tensor(x1)
x2_t = torch.tensor(x2)
w1_t = torch.tensor(w1, requires_grad=True)
w2_t = torch.tensor(w2, requires_grad=True)

z1_t = torch.matmul(torch.stack([x1_t, x2_t]), w1_t)
print(z1_t)
a1_t = F.relu(z1_t)
print(a1_t)
z2_t = torch.matmul(a1_t, w2_t)
print(z2_t)
a2_t = F.softmax(z2_t, dim=1)
print('\nfinal output of torch version:')
print(a2_t)

tensor([[ 3.8000,  4.4000,  5.0000,  5.6000],
        [ 8.3000,  9.8000, 11.3000, 12.8000]], grad_fn=<MmBackward0>)
tensor([[ 3.8000,  4.4000,  5.0000,  5.6000],
        [ 8.3000,  9.8000, 11.3000, 12.8000]], grad_fn=<ReluBackward0>)
tensor([[10.0000, 11.8800],
        [22.6000, 26.8200]], grad_fn=<MmBackward0>)

final output of torch version:
tensor([[0.1324, 0.8676],
        [0.0145, 0.9855]], grad_fn=<SoftmaxBackward0>)


In [None]:
# numpy version
x1_n = np.array(x1)
x2_n = np.array(x2)
w1_n = np.array(w1)
w2_n = np.array(w2)

def np_softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1, keepdims=True)

z1_n = np.matmul(np.array([x1_n, x2_n]), w1_n)
print(z1_n)
a1_n = np.maximum(z1_n, 0)
print(a1_n)
z2_n = np.matmul(a1_n, w2_n)
print(z2_n)
a2_n = np_softmax(z2_n)
print('\nfinal output of numpy version:')
print(a2_n)

[[ 3.8  4.4  5.   5.6]
 [ 8.3  9.8 11.3 12.8]]
[[ 3.8  4.4  5.   5.6]
 [ 8.3  9.8 11.3 12.8]]
[[10.   11.88]
 [22.6  26.82]]

final output of numpy version:
[[0.13238887 0.86761113]
 [0.01448572 0.98551428]]


## Task 2

In [None]:
y1 = [0,1]
y2 = [1,0]

learning_rate = 0.01

In [None]:
# torch version
y1_t = torch.Tensor(y1)
y2_t = torch.Tensor(y2)
y_t = torch.stack([y1_t,y2_t])

def cross_entopy_loss_torch(y_true, y_pred):
    return -torch.sum(y_true * torch.log(y_pred))
    
loss_t = cross_entopy_loss_torch(y_t, a2_t)
loss_t.backward(retain_graph=True)
dw1_t = w1_t.grad.clone()
dw1_t

tensor([[0.5714, 0.5714, 0.5714, 0.5714],
        [0.6994, 0.6994, 0.6994, 0.6994],
        [0.8274, 0.8274, 0.8274, 0.8274]])

In [None]:
# numpy version
y1_n = np.array(y1)
y2_n = np.array(y2)
y_n = np.array([y1_n,y2_n])

def cross_entopy_loss_np(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred))

loss_n = cross_entopy_loss_np(y_n, a2_n)

# Calculate gradients
dL_da2_n = (a2_n - y_n) / (a2_n * (1 - a2_n))
dL_dz2_n = dL_da2_n * a2_n * (1 - a2_n)
dL_dw2_n = np.matmul(a1_n.T, dL_dz2_n)
dL_da1_n = np.matmul(dL_dz2_n, w2_n.T)
dL_dz1_n = dL_da1_n * (z1_n > 0)
dL_dw1_n = np.matmul(np.array([x1_n, x2_n]).T, dL_dz1_n)
print(dL_dw1_n)

[[0.38096682 0.38096682 0.38096682 0.38096682]
 [0.46627936 0.46627936 0.46627936 0.46627936]
 [0.5515919  0.5515919  0.5515919  0.5515919 ]]
