In [7]:
import numpy as np
import torch
from torch import nn

In [8]:
np.__version__, torch.__version__ #check version

('1.25.2', '2.2.1+cu121')

In [9]:
torch.manual_seed(42)

<torch._C.Generator at 0x7bad90209a30>

### Task1 (np vs torch)

In [192]:
# Task1 using Numpy
x1_n = np.array([1.0, 2.0, 3.0])
x2_n = np.array([4.0, 5.0, 6.0])

w1_n = np.array([[0.1, 0.2, 0.3, 0.4],
               [0.5, 0.6, 0.7, 0.8],
               [0.9, 1.0, 1.1, 1.2]])
w2_n = np.array([[0.2, 0.3],
                [0.4, 0.5],
                [0.6, 0.7],
                [0.8, 0.9]])

def ReLU(x): # define ReLU using numpy # x.shape (4,)
  return np.maximum(x, 0)

def Softmax(x): # define Softmax using numpy # x.shape (2,)
  return np.exp(x)/sum(np.exp(x))


def NN_np(input, dropout = False, p = 0, random_seed = 42): # define neural network
  hidden_nodes = np.dot(input, w1_n) # shape (4,)
  hidden_nodes_act = ReLU(hidden_nodes) # apply activatin function ReLU
  if dropout == True:
    np.random.seed(random_seed)
    drop = np.random.rand(4) # 제거될 확률
    drop = drop < p
    hidden_nodes_act = hidden_nodes_act * drop
    hidden_nodes_act /= (1-p)
  output_nodes_np = np.dot(hidden_nodes_act, w2_n) # shape (2,)
  output_nodes_act = Softmax(output_nodes_np) # apply activation function Softmax
  return hidden_nodes, hidden_nodes_act, output_nodes_np, output_nodes_act

print('when input is x1: ', np.round(NN_np(x1_n)[3], 4))
print('when input is x2: ', np.round(NN_np(x2_n)[3], 4))

when input is x1:  [0.1324 0.8676]
when input is x2:  [0.0145 0.9855]


In [135]:
# Task1 using Pytorch
x1_t = torch.tensor([1.0, 2.0, 3.0])
x2_t = torch.tensor([4.0, 5.0, 6.0])

w1_t = torch.tensor([[0.1, 0.2, 0.3, 0.4],
                   [0.5, 0.6, 0.7, 0.8],
                   [0.9, 1.0, 1.1, 1.2]]) # (3, 4)
w2_t = torch.tensor([[0.2, 0.3],
                   [0.4, 0.5],
                   [0.6, 0.7],
                   [0.8, 0.9]])  # (4, 2)

class NN(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear1 = nn.Linear(3, 4, bias = False)
    self.linear1.weight.data = w1_t.T
    self.act1 = nn.ReLU()
    self.dropout = nn.Dropout(p=0.4)
    self.linear2 = nn.Linear(4, 2, bias = False)
    self.linear2.weight.data = w2_t.T
    self.act2 = nn.Softmax(dim = -1)

  def forward(self, x, dropout = False):
    x = self.linear1(x)
    x = self.act1(x)
    if dropout == True:
      x = self.dropout(x)
    x = self.linear2(x)
    x = self.act2(x)
    return x

NN_torch = NN()
print('when input is x1: ', NN_torch(x1_t))
print('when input is x2: ', NN_torch(x2_t))

when input is x1:  tensor([0.1324, 0.8676], grad_fn=<SoftmaxBackward0>)
when input is x2:  tensor([0.0145, 0.9855], grad_fn=<SoftmaxBackward0>)


### Task2 (np vs torch) - w1 gradient without updates

In [206]:
# Task2 using Numpy
w1_n = np.array([[0.1, 0.2, 0.3, 0.4],
               [0.5, 0.6, 0.7, 0.8],
               [0.9, 1.0, 1.1, 1.2]])
w2_n = np.array([[0.2, 0.3],
                [0.4, 0.5],
                [0.6, 0.7],
                [0.8, 0.9]])
y1_n = np.array([0, 1])
y2_n = np.array([1, 0])
def CEE_np(y_pred, y_target):
  y_pred = Softmax(y_pred)
  return -np.sum(y_target*np.log(y_pred))

def gradient_w1(input, target, i, j, dropout = False, p = 0, random_seed = 42): # i_1: start, j_1: end
  if dropout == True:
    output = NN_np(input, True, p, random_seed = 42)[-1]
  else:
    output = NN_np(input)[-1]
  dL_do = Softmax(output) - target # error
  do_dy = np.zeros((2,2))
  for i_1 in range(2):
    for j_1 in range(2):
      if i_1 == j_1:
        do_dy[i_1][j_1] = output[0]*output[1]
      else:
        do_dy[i_1][j_1] = -output[0]*output[1]
  dy_dr_j = w2_n[j].T
  dr_j_dh = np.array([1 if NN_np(input)[0][j] > 0 else 0])
  dh_dw_ij = input[i]
  result = np.dot(dL_do, do_dy)
  result = np.dot(result, dy_dr_j)
  result = np.dot(result, dr_j_dh)
  result = np.dot(result, dh_dw_ij)
  return round(result.item(), 4)

def get_weight1_grad_np(input, target, dropout = False, p = 0, random_seed = 42):
  weight_grad = np.zeros((3,4))
  for start in range(3):
    for end in range(4):
      if dropout == True:
        weight_grad[start][end] = gradient_w1(input, target, start, end, dropout, p, random_seed = 42)
        idx = np.where(NN_np(input, True, p, random_seed)[1] == 0)[0] # 00인 지점은 가중치 0으로 세팅
        for i in idx:
          weight_grad[:, i] = 0.0
      else:
        weight_grad[start][end] = gradient_w1(input, target, start, end)

  return weight_grad

print('when input is x1: \n', get_weight1_grad_np(x1_n, y1_n))
print()
print('when input is x2: \n', get_weight1_grad_np(x2_n, y2_n))

when input is x1: 
 [[-0.0074 -0.0074 -0.0074 -0.0074]
 [-0.0149 -0.0149 -0.0149 -0.0149]
 [-0.0223 -0.0223 -0.0223 -0.0223]]

when input is x2: 
 [[0.0083 0.0083 0.0083 0.0083]
 [0.0104 0.0104 0.0104 0.0104]
 [0.0124 0.0124 0.0124 0.0124]]


In [208]:
# Task2 using Pytorch
import torch.optim as optim
w1_t = torch.tensor([[0.1, 0.2, 0.3, 0.4],
                   [0.5, 0.6, 0.7, 0.8],
                   [0.9, 1.0, 1.1, 1.2]]) # (3, 4)
w2_t = torch.tensor([[0.2, 0.3],
                   [0.4, 0.5],
                   [0.6, 0.7],
                   [0.8, 0.9]])  # (4, 2)
y1_t = torch.tensor([0, 1], dtype = torch.float32)
y2_t = torch.tensor([1, 0], dtype = torch.float32)

def get_weight_grad_t(input, target):
  NN_torch = NN()
  loss_fn = nn.CrossEntropyLoss()

  pred = NN_torch(input)
  loss = loss_fn(pred, target)
  loss.backward()
  result = NN_torch.linear1.weight.grad
  return result.T

print('when input is x1:\n', get_weight_grad_t(x1_t, y1_t))
print()
print('when input is x2:\n', get_weight_grad_t(x2_t, y2_t))

when input is x1:
 tensor([[-0.0074, -0.0074, -0.0074, -0.0074],
        [-0.0149, -0.0149, -0.0149, -0.0149],
        [-0.0223, -0.0223, -0.0223, -0.0223]])

when input is x2:
 tensor([[0.0083, 0.0083, 0.0083, 0.0083],
        [0.0104, 0.0104, 0.0104, 0.0104],
        [0.0124, 0.0124, 0.0124, 0.0124]])


### Task3 (np vs torch) - updated w1, w2 (100 epoch)

In [210]:
# Before task3 - w2 gradient using np
def gradient_w2(input, target, i, j, dropout = False, p = 0, random_seed = 42): # i_1: start, j_1: end
  if dropout == True:
    output = NN_np(input, dropout, p, random_seed)[-1]
  else:
    output = NN_np(input)[-1]
  dL_do = Softmax(output) - target
  do_dy = np.zeros((2,2))
  for i_1 in range(2):
    for j_1 in range(2):
      if i_1 == j_1:
        do_dy[i_1][j_1] = output[0]*output[1]
      else:
        do_dy[i_1][j_1] = -output[0]*output[1]
  dy_dr_j = NN_np(input)[1][i] # 히든 after_act 값만 얻으면 됨. (즉, dropout되든 안되든 일단 곱하고 뒤에서 dropout된 것들의 값은 0으로 초기화시킴.)
  result = np.dot(dL_do, do_dy)[j]
  result = result * dy_dr_j
  return round(result.item(), 4)



def get_weight2_grad_np(input, target, dropout = False, p = 0, random_seed = 42):
  weight_grad = np.zeros((4,2))
  for start in range(4):
    for end in range(2):
      if dropout == True:
        weight_grad[start][end] = gradient_w2(input, target, start, end, dropout, p, random_seed)
        idx = np.where(NN_np(input, True, p, random_seed)[1] == 0)[0] # 00인 지점은 가중치 0으로 세팅 #idx가 고정되는 error 발생
        for i in idx:
          weight_grad[i, :] = 0.0
      else:
        weight_grad[start][end] = gradient_w2(input, target, start, end)
  return weight_grad

In [211]:
# Task3 using numpy - input x1
w1_n = np.array([[0.1, 0.2, 0.3, 0.4],
               [0.5, 0.6, 0.7, 0.8],
               [0.9, 1.0, 1.1, 1.2]])
w2_n = np.array([[0.2, 0.3],
                [0.4, 0.5],
                [0.6, 0.7],
                [0.8, 0.9]])

for i in range(100): # 100 epochs
  random_seed = i+40
  w1_grad = get_weight1_grad_np(x1_n, y1_n, True, 0.4, random_seed)
  w2_grad = get_weight2_grad_np(x1_n, y1_n, True, 0.4, random_seed)
  w1_n -= 0.01*w1_grad
  w2_n -= 0.01*w2_grad
print('input: x1')
print()
print('w1: ', np.round(w1_n, 4))
print()
print('w2: ', np.round(w2_n, 4))


input: x1

w1:  [[0.1107 0.2103 0.3085 0.4107]
 [0.5214 0.6207 0.7169 0.8213]
 [0.9322 1.031  1.1254 1.232 ]]

w2:  [[0.1191 0.3809]
 [0.3274 0.5726]
 [0.5234 0.7766]
 [0.7254 0.9746]]


In [212]:
# Task3 using numpy - input x2
w1_n = np.array([[0.1, 0.2, 0.3, 0.4],
               [0.5, 0.6, 0.7, 0.8],
               [0.9, 1.0, 1.1, 1.2]])
w2_n = np.array([[0.2, 0.3],
                [0.4, 0.5],
                [0.6, 0.7],
                [0.8, 0.9]])

for i in range(100): # 100 epochs
  random_seed = i+40
  w1_grad = get_weight1_grad_np(x2_n, y2_n, True, 0.4, random_seed)
  w2_grad = get_weight2_grad_np(x2_n, y2_n, True, 0.4, random_seed)
  w1_n -= 0.01*w1_grad
  w2_n -= 0.01*w2_grad
print('input: x2')
print()
print('w1: ', np.round(w1_n, 4))
print()
print('w2: ', np.round(w2_n, 4))

input: x2

w1:  [[0.1104 0.2102 0.3097 0.4128]
 [0.513  0.6127 0.7122 0.8161]
 [0.9156 1.0152 1.1146 1.2193]]

w2:  [[0.3463 0.1537]
 [0.5362 0.3638]
 [0.7412 0.5588]
 [0.9364 0.7636]]


In [213]:
# Task3 using Pytorch - input x1
w1_t = torch.tensor([[0.1, 0.2, 0.3, 0.4],
                   [0.5, 0.6, 0.7, 0.8],
                   [0.9, 1.0, 1.1, 1.2]]) # (3, 4)
w2_t = torch.tensor([[0.2, 0.3],
                   [0.4, 0.5],
                   [0.6, 0.7],
                   [0.8, 0.9]])  # (4, 2)

NN_torch1 = NN()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(NN_torch1.parameters(), lr = 0.01)

# input: x1
for epoch in range(100):
  pred = NN_torch1(x1_t, True)
  loss = loss_fn(pred, y1_t)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
print('input: x1\n')
print('w1: ', NN_torch1.linear1.weight.T)
print('w2: ', NN_torch1.linear2.weight.T)

input: x1

w1:  tensor([[0.1038, 0.2022, 0.3031, 0.4022],
        [0.5076, 0.6045, 0.7062, 0.8043],
        [0.9114, 1.0067, 1.1093, 1.2065]], grad_fn=<PermuteBackward0>)
w2:  tensor([[0.1184, 0.3816],
        [0.3374, 0.5626],
        [0.5136, 0.7864],
        [0.7277, 0.9723]], grad_fn=<PermuteBackward0>)


In [214]:
# Task3 using Pytorch - input x2
w1_t = torch.tensor([[0.1, 0.2, 0.3, 0.4],
                   [0.5, 0.6, 0.7, 0.8],
                   [0.9, 1.0, 1.1, 1.2]]) # (3, 4)
w2_t = torch.tensor([[0.2, 0.3],
                   [0.4, 0.5],
                   [0.6, 0.7],
                   [0.8, 0.9]])  # (4, 2)

NN_torch1 = NN()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(NN_torch1.parameters(), lr = 0.01)

# input: x2
for epoch in range(100):
  pred = NN_torch1(x2_t, True)
  loss = loss_fn(pred, y2_t)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
print('input: x2\n')
print('w1: ', NN_torch1.linear1.weight.T)
print('w2: ', NN_torch1.linear2.weight.T)

input: x2

w1:  tensor([[0.1032, 0.2008, 0.3007, 0.4000],
        [0.5040, 0.6010, 0.7009, 0.8000],
        [0.9048, 1.0012, 1.1011, 1.2000]], grad_fn=<PermuteBackward0>)
w2:  tensor([[0.3584, 0.1416],
        [0.5356, 0.3644],
        [0.7389, 0.5611],
        [0.9234, 0.7766]], grad_fn=<PermuteBackward0>)
