## 데이터 불러오기

In [None]:
### 구글 서버와 내 드라이브 연결하기
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
### 작업 디렉토리 변경하기
%cd '/content/drive/MyDrive/KDT/비정형텍스트분석'

/content/drive/MyDrive/KDT/비정형텍스트분석


In [None]:
### 필요한 라이브러리 임폴트
import tensorflow as tf
import numpy as np
import re
import json
import matplotlib.pyplot as plt

In [None]:
### 저장된 결과 불러오기
enc_inputs = np.load('enc_inputs.npy')
dec_inputs = np.load('dec_inputs.npy')
dec_targets = np.load('dec_targets.npy')
with open('data_configs.json', 'r') as f:
    data_configs = json.load(f)

## 랜덤 시드 설정

In [None]:
tf.random.set_seed(99)
np.random.seed(99)

## 모델 하이퍼파라미터 정의

In [None]:
word2idx = data_configs['word2idx']
eos_index = data_configs['eos_symbol']
model_name = 'transformer'
vocab_size = data_configs['vocab_size']
MAX_SEQUENCE = 25
batch_size = 2
epochs = 30
valid_split = 0.1

kargs = {
    'model_name':model_name,
    'num_layers':2,
    'd_model':512,
    'num_heads':8,
    'dff':2048,
    'input_vocab_size':vocab_size,
    'target_vocab_size':vocab_size,
    'eos_token_ids':word2idx[eos_index],
    'rate':0.1,
    'maximum_position_encoding':MAX_SEQUENCE
}

## 모델 생성

### position encoding

In [None]:
### 임베딩 벡터 각 인덱스에 적용할 각도 생성 함수 정의
def get_angles(pos, i, d_model):
    angle_rates = 1/np.power(10000, (2*i//2) / np.float32(d_model))
    return pos * angle_rates

In [None]:
### 임베깅 벡터 각 인덱스에 적용할 각도 생성 함수 해석 --> 매개변수 해석
'''
1. pos : 문장을 구성하고 있는 단어의 위치(0,...,24) --> reshape --> (25,1)
2. 임베딩 벡터의 인덱스(0,...,511)  --> reshape --> (1, 512)
'''
angle_rads = get_angles(np.arange(25).reshape(25,1), np.arange(512).reshape(1,512), 512)

# 결과 확인하기
print(f'단어 별로 생성된 임베딩 각도 : \n{angle_rads}')

단어 별로 생성된 임베딩 각도 : 
[[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [1.00000000e+00 9.82171889e-01 9.64661620e-01 ... 1.05544960e-04
  1.03663293e-04 1.01815172e-04]
 [2.00000000e+00 1.96434378e+00 1.92932324e+00 ... 2.11089920e-04
  2.07326586e-04 2.03630344e-04]
 ...
 [2.20000000e+01 2.16077816e+01 2.12225556e+01 ... 2.32198912e-03
  2.28059244e-03 2.23993379e-03]
 [2.30000000e+01 2.25899535e+01 2.21872173e+01 ... 2.42753408e-03
  2.38425574e-03 2.34174896e-03]
 [2.40000000e+01 2.35721253e+01 2.31518789e+01 ... 2.53307904e-03
  2.48791903e-03 2.44356413e-03]]


In [None]:
### pos : 문장을 구성하고 있는 단어의 위치 예시
copy_array = np.arange(25)[:, np.newaxis]
print(copy_array)

[[ 0]
 [ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]]


In [None]:
np.arange(512)[np.newaxis, :]

array([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
        130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
        143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
        156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
        169, 170, 171, 172, 173, 174, 175, 176, 177

In [None]:
def get_angles_test(i, d_model):
    angle_rates = 1/np.power(10000, (2*i//2) / np.float32(d_model))
    return angle_rates

angle_rates = get_angles_test(np.arange(512).reshape(1,512), 512)

# 결과 확인하기
print(angle_rates)

[[1.00000000e+00 9.82171889e-01 9.64661620e-01 9.47463526e-01
  9.30572041e-01 9.13981699e-01 8.97687132e-01 8.81683067e-01
  8.65964323e-01 8.50525815e-01 8.35362547e-01 8.20469611e-01
  8.05842188e-01 7.91475544e-01 7.77365030e-01 7.63506080e-01
  7.49894209e-01 7.36525012e-01 7.23394163e-01 7.10497411e-01
  6.97830585e-01 6.85389584e-01 6.73170382e-01 6.61169026e-01
  6.49381632e-01 6.37804384e-01 6.26433537e-01 6.15265410e-01
  6.04296390e-01 5.93522927e-01 5.82941535e-01 5.72548788e-01
  5.62341325e-01 5.52315842e-01 5.42469094e-01 5.32797895e-01
  5.23299115e-01 5.13969680e-01 5.04806572e-01 4.95806824e-01
  4.86967525e-01 4.78285814e-01 4.69758882e-01 4.61383968e-01
  4.53158364e-01 4.45079406e-01 4.37144481e-01 4.29351021e-01
  4.21696503e-01 4.14178451e-01 4.06794432e-01 3.99542056e-01
  3.92418976e-01 3.85422887e-01 3.78551525e-01 3.71802666e-01
  3.65174127e-01 3.58663762e-01 3.52269465e-01 3.45989166e-01
  3.39820833e-01 3.33762469e-01 3.27812115e-01 3.21967844e-01
  3.1622

In [None]:
def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)

    # 임베딩 벡터 : 인덱스 --> 짝수
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    
    # 임베딩 벡터 : 인덱스 --> 홀수
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

### masking

### 5/26 실습

In [None]:
### 패딩 마스킹(padding masking) 구현 함수 정의
def create_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)    
    return seq[:,tf.newaxis, tf.newaxis, :]
    # return tf.reshape(seq, (seq.shape[0],1,1,seq.shape[1])))

In [None]:
### 패딩 마스크 구현 함수 사용 예시
# seq = seq * -1e9
# print(seq)

In [None]:
# seq = tf.cast(tf.math.equal(enc_inputs[0], 0), tf.float32)
# print(seq)

In [None]:
enc_inputs[0]

array([116,  87,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0])

### self attention

In [None]:
### tensorflow 버전 확인
print(tf.__version__)

2.12.0


In [None]:
def scaled_dot_product_attention(q,k,v,mask):
    matmul_qk = tf.linalg.matmul(a=q, b=k, transpose_b=True) 
    # scale matmul_qk
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    # masking
    if mask is not None:
        scaled_attention_logits += (mask * -1e9)

    # softmax 함수 실행
    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)

    # value * 가중치
    output = tf.matmul(attention_weights, v)

    return output, attention_weights

### MultiHeadAttention

In [None]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, **kargs):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = kargs['num_heads']
        self.d_model = kargs['d_model']

        assert self.d_model % self.num_heads == 0

        self.depth = self.d_model // self.num_heads

        self.wq = tf.keras.layers.Dense(kargs['d_model'])
        self.wk = tf.keras.layers.Dense(kargs['d_model'])
        self.wv = tf.keras.layers.Dense(kargs['d_model'])

        self.dense = tf.keras.layers.Dense(kargs['d_model'])

    def split_heads(self, x, batch_size):       
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]

        q = self.wq(q)  # (batch_size, seq_len, d_model)
        k = self.wk(k)  # (batch_size, seq_len, d_model)
        v = self.wv(v)  # (batch_size, seq_len, d_model)

        q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
        k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
        v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

        # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
        # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
        scaled_attention, attention_weights = scaled_dot_product_attention(
            q, k, v, mask)

        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

        concat_attention = tf.reshape(scaled_attention, 
                                      (batch_size, -1, self.d_model))  # (batch_size, seq_len_q, d_model)

        output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)

        return output, attention_weights

In [None]:
### q, k, v 행렬 생성 확인

# enc_inputs --> Embedding layer --> 생성되는 임베딩 배열 --> 문장 1개 단위 모양 : (1,25,512)
input = tf.keras.Input(shape=(1,25,512))

# 입력 데이터 --> Dense layer 통과 --> 가중치 행렬과 연산 --> q, k, v 생성
wq = tf.keras.layers.Dense(units=512)
wk = tf.keras.layers.Dense(units=512)
wv = tf.keras.layers.Dense(units=512)

q = wq(input)
k = wk(input)
v = wv(input)

# 결과 확인하기
print(q.shape)
print(k.shape)
print(v.shape)

(None, 1, 25, 512)
(None, 1, 25, 512)
(None, 1, 25, 512)


### FFN

In [None]:
def point_wise_feed_forward_network(**kargs):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(units=kargs['dff'], activation='relu'),
        tf.keras.layers.Dense(units=kargs['d_model'])
    ])

### Encoder Layer

In [None]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kargs):
        super(EncoderLayer, self).__init__()

        self.mha = MultiHeadAttention(**kargs)
        self.ffn = point_wise_feed_forward_network(**kargs)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(kargs['rate'])
        self.dropout2 = tf.keras.layers.Dropout(kargs['rate'])
    def call(self, x, mask):
        attn_output, _ = self.mha(x,x,x,mask)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(x + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        out2 = self.layernorm2(out1+ffn_output)
        return out2

### Encoder

In [None]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, **kargs):
        super(Encoder, self).__init__()

        self.d_model = kargs['d_model']
        self.num_layers = kargs['num_layers']

        self.embedding = tf.keras.layers.Embedding(kargs['input_vocab_size'], self.d_model)
        self.pos_encoding = positional_encoding(kargs['maximum_position_encoding'], 
                                                self.d_model)


        self.enc_layers = [EncoderLayer(**kargs) for _ in range(self.num_layers)]

        self.dropout = tf.keras.layers.Dropout(kargs['rate'])

    def call(self, x, mask):

        seq_len = tf.shape(x)[1]

        # adding embedding and position encoding.
        x = self.embedding(x)  # (batch_size, input_seq_len, d_model)
        # x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]

        x = self.dropout(x)

        for i in range(self.num_layers):
            x = self.enc_layers[i](x, mask)

        return x  # (batch_size, input_seq_len, d_model)

In [None]:
### Encoder 객체 생성
encoder = Encoder(**kargs)

In [None]:
### 입력 데이터 생성 --> Encoder 클래스 --> 결과 값 추출

# mask 생성
enc_padding_mask = create_padding_mask(enc_inputs)

# call() 함수 호출
enc_output = encoder.call(enc_inputs, enc_padding_mask)
print(enc_output)
print(enc_output.shape)

tf.Tensor(
[[[ 3.44942838e-01 -2.11669159e+00 -3.79023552e-02 ...  5.57053983e-01
   -4.45747912e-01 -7.38333881e-01]
  [ 8.58155251e-01 -2.37815475e+00  3.81547719e-01 ...  4.83715981e-01
   -4.63531494e-01 -6.78970635e-01]
  [ 8.99092615e-01 -2.96937084e+00  3.11517656e-01 ...  6.24412894e-01
   -4.86593038e-01 -5.60582638e-01]
  ...
  [ 5.73798597e-01 -2.69557405e+00 -2.01548606e-01 ...  6.77049458e-01
   -7.16934979e-01 -3.78748365e-02]
  [ 9.49558765e-02 -2.65950894e+00 -6.86962664e-01 ...  6.87113345e-01
   -7.99432039e-01 -7.11470917e-02]
  [ 7.23279919e-03 -2.25313592e+00 -1.04625964e+00 ...  6.18758202e-01
   -8.59830618e-01 -1.57082051e-01]]

 [[ 4.66473907e-01 -1.99182892e+00 -3.10489181e-02 ...  4.64415967e-01
   -1.19833566e-01 -5.17452955e-01]
  [ 9.60671067e-01 -2.26855803e+00  4.59999174e-01 ...  4.12631631e-01
   -1.52203068e-01 -4.61881369e-01]
  [ 1.04618597e+00 -2.90945625e+00  3.82258326e-01 ...  5.26612997e-01
   -1.40830308e-01 -3.69827062e-01]
  ...
  [ 7.348016