In [7]:
conda install pytorch torchvision -c pytorch

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.10.3
  latest version: 4.11.0

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /Users/young/opt/anaconda3/envs/GPT_BERT_book

  added / updated specs:
    - pytorch
    - torchvision


The following NEW packages will be INSTALLED:

  blas               pkgs/main/osx-64::blas-1.0-mkl
  bzip2              pkgs/main/osx-64::bzip2-1.0.8-h1de35cc_0
  ffmpeg             pytorch/osx-64::ffmpeg-4.3-h0a44026_0
  freetype           pkgs/main/osx-64::freetype-2.11.0-hd8bbffd_0
  gettext            pkgs/main/osx-64::gettext-0.21.0-h7535e17_0
  giflib             pkgs/main/osx-64::giflib-5.2.1-haf1e3a3_0
  gmp                pkgs/main/osx-64::gmp-6.2.1-h23ab428_2
  gnutls             pkgs/main/osx-64::gnutls-3.6.15-hed9c0bf_0
  icu                pkgs/main/osx-64::icu-58.2-h0a44026_3
  intel-openmp       pkgs/main/osx-


Note: you may need to restart the kernel to use updated packages.


In [9]:
import torch

x = torch.tensor([
  [1.0, 0.0, 1.0, 0.0],
  [0.0, 2.0, 0.0, 2.0],
  [1.0, 1.0, 1.0, 1.0],  
])
w_query = torch.tensor([
  [1.0, 0.0, 1.0],
  [1.0, 0.0, 0.0],
  [0.0, 0.0, 1.0],
  [0.0, 1.0, 1.0]
])
w_key = torch.tensor([
  [0.0, 0.0, 1.0],
  [1.0, 1.0, 0.0],
  [0.0, 1.0, 0.0],
  [1.0, 1.0, 0.0]
])
w_value = torch.tensor([
  [0.0, 2.0, 0.0],
  [0.0, 3.0, 0.0],
  [1.0, 0.0, 3.0],
  [1.0, 1.0, 0.0]
])

In [10]:
keys = torch.matmul(x, w_key)
querys = torch.matmul(x, w_query)
values = torch.matmul(x, w_value)

In [11]:
attn_scores = torch.matmul(querys, keys.T)

In [12]:
attn_scores

tensor([[ 2.,  4.,  4.],
        [ 4., 16., 12.],
        [ 4., 12., 10.]])

In [14]:
import numpy as np
from torch.nn.functional import softmax

key_dim_sqrt = np.sqrt(keys.shape[-1]) # 키 벡터 차원수?
attn_scores_softmax = softmax(attn_scores / key_dim_sqrt, dim=-1)

In [15]:
attn_scores_softmax

tensor([[1.3613e-01, 4.3194e-01, 4.3194e-01],
        [8.9045e-04, 9.0884e-01, 9.0267e-02],
        [7.4449e-03, 7.5471e-01, 2.3785e-01]])

소프트맥스 확률과 밸류를 가중합하기

In [17]:
weighted_values = torch.matmul(attn_scores_softmax, values)

In [18]:
weighted_values

tensor([[1.8639, 6.3194, 1.7042],
        [1.9991, 7.8141, 0.2735],
        [1.9926, 7.4796, 0.7359]])

피드포워드 뉴럴 네트워크 계산 예시

기본 값들 설정

In [21]:
import torch
x = torch.tensor([2,1])
w1 = torch.tensor([[3,2,-4],[2,-3,1]])
b1 = 1
w2 = torch.tensor([[-1, 1], [1,2], [3,1]])
b2 = -1

실제 계산 수행하는 코드

In [27]:
h_preact = torch.matmul(x, w1) + b1 # 입력층
h = torch.nn.functional.relu(h_preact) # 은닉층
y = torch.matmul(h, w2) + b2 # 출력층

In [28]:
h_preact

tensor([ 9,  2, -6])

In [29]:
h

tensor([9, 2, 0])

In [30]:
y

tensor([-8, 12])

레이어 정규화 예시

입력모양: 배치크기(2) x 피처의 차원수(3)

In [33]:
import torch
input = torch.tensor([[1.0, 2.0, 3.0], [1.0, 1.0, 1.0]]) # 배치 2; 피처의 차원수 3
m = torch.nn.LayerNorm(input.shape[-1]) # 레이어 정규화 수행
output = m(input)

In [34]:
output

tensor([[-1.2247,  0.0000,  1.2247],
        [ 0.0000,  0.0000,  0.0000]], grad_fn=<NativeLayerNormBackward0>)

감마 (weight), 바이어스 (bias)

In [35]:
m.weight

Parameter containing:
tensor([1., 1., 1.], requires_grad=True)

In [36]:
m.bias

Parameter containing:
tensor([0., 0., 0.], requires_grad=True)

드롭아웃 구현

In [37]:
import torch
m = torch.nn.Dropout(p=0.2) # 드롭아웃 수행비율 평균적으로 20% 만듦
input = torch.randn(1, 10)
output = m(input)

In [38]:
m

Dropout(p=0.2, inplace=False)

In [39]:
input

tensor([[ 0.7341, -1.0007, -0.9627,  1.1245, -0.9849, -0.3998, -0.2957, -0.1473,
         -0.4646,  0.8293]])

In [40]:
output

tensor([[ 0.9176, -1.2508, -1.2033,  0.0000, -1.2311, -0.4997, -0.3696, -0.0000,
         -0.5807,  0.0000]])

-> 위에서 보듯, 8번째, 10번째가 0으로 대치됨 (드롭아웃 적용)

아담옵티마이저 구현

In [43]:
from torch.optim import Adam
model = output
optimizer = Adam(model.parameters(), lr=model.learning_rate)

AttributeError: 'Tensor' object has no attribute 'parameters'