Transformer 모델의 구성 -> Transformer_Implement.ipynb

In [1]:
import import_ipynb
from Transformer_Implement import Transformer, LearningRate
from CommonModule.Handle_Dir import mkdir_p, del_folder
from CommonModule.Embedding import Embedding
from CommonModule.Encoder import IntegerEncoder

import re
import os
import numpy as np
from pathlib import Path
from glob import iglob
from glove import Corpus, Glove
import tensorflow as tf

importing Jupyter notebook from Transformer_Implement.ipynb
importing Jupyter notebook from /data/ksb/TestSampleDir/articleSummary-Jupyter/Transformer/CommonModule/Handle_Dir.ipynb
importing Jupyter notebook from /data/ksb/TestSampleDir/articleSummary-Jupyter/Transformer/CommonModule/Embedding.ipynb
importing Jupyter notebook from /data/ksb/TestSampleDir/articleSummary-Jupyter/Transformer/CommonModule/Encoder.ipynb


경로 설정

In [2]:
BASE_DIR = "/data/ksb/TestSampleDir"
DATA_BASE_DIR = os.path.join(BASE_DIR, 'articles')

PREPROCESSED_PATH = os.path.join(DATA_BASE_DIR,"Preprocessed-Data")

SUMMARY_PREPROCESSED_PATH = os.path.join(DATA_BASE_DIR,"Summary-Preprocessed-Data")

WORD_EMBEDDING_DIR = os.path.join(os.path.join(BASE_DIR, 'articleSummary-Jupyter'), 'Word-Embedding-Model')
MODEL_DIR = os.path.join(os.path.join(BASE_DIR, 'articleSummary-Jupyter'), 'Transformer-Model')

In [3]:
D_MODEL = 128
MIN_COUNT = 10

In [4]:
glove_model_path = os.path.join(WORD_EMBEDDING_DIR, 'glove-{d_model}-{mincount}.model'.format(d_model=D_MODEL, mincount=MIN_COUNT))
input_corpus_model_path = os.path.join(WORD_EMBEDDING_DIR, 'input-corpus-{d_model}-{mincount}.model'.format(d_model=D_MODEL, mincount=MIN_COUNT))
summary_corpus_model_path = os.path.join(WORD_EMBEDDING_DIR, 'summary-corpus-{d_model}-{mincount}.model'.format(d_model=D_MODEL, mincount=MIN_COUNT))


말뭉치와 함께 임베딩 모델 불러오기

In [5]:
glove = Glove.load(glove_model_path)
corpus = Corpus.load(input_corpus_model_path)
summary_corpus = Corpus.load(summary_corpus_model_path)

INPUT_VOCAB_SIZE = len(corpus.dictionary)
SUMMARY_VOCAB_SIZE = len(summary_corpus.dictionary)

In [6]:
origin_embedding = Embedding(PREPROCESSED_PATH, corpus=corpus, glove=glove, model='GloVe')
origin_encoded_list = origin_embedding.get_embedded_list()

In [7]:
len(origin_encoded_list)

47646

In [8]:
summary_embedding = Embedding(SUMMARY_PREPROCESSED_PATH, corpus=corpus, glove=glove, model='GloVe')
summary_encoded_list = summary_embedding.get_embedded_list()

In [9]:
len(summary_encoded_list)

47646

In [10]:
options = {
    'model-type' : 'GloVe',
    'inv_wv' : None,
    'corpus' : corpus
}
output_encoded_list = IntegerEncoder(options, filepaths=list(iglob(os.path.join(SUMMARY_PREPROCESSED_PATH, '**.csv'), recursive=False))).encoder()


In [12]:
LAYER_NUM = 6
NUM_HEADS = 8
DFF = 512
VOCAB_SIZE = len(corpus.dictionary)

BATCH_SIZE = 64
BUFFER_SIZE = 5000

WARMUP_STEPS = 50
EPOCHS = 70

In [13]:
get_max_length = lambda x : np.max([x[idx].shape[0] for idx in range(len(x))]) 
MAX_LEN = get_max_length(origin_encoded_list)
MAX_LEN

278

In [14]:
# Padding
origin_encoded_matrix = tf.keras.preprocessing.sequence.pad_sequences(
    origin_encoded_list, maxlen=MAX_LEN, padding='post')
summary_encoded_matrix = tf.keras.preprocessing.sequence.pad_sequences(
    summary_encoded_list, maxlen=MAX_LEN, padding='post')
output_encoded_matrix = tf.keras.preprocessing.sequence.pad_sequences(
    output_encoded_list, maxlen=MAX_LEN, padding='post')

In [15]:
print('Contents Shape : {}'.format(origin_encoded_matrix.shape))
print('Summaries Shape : {}'.format(summary_encoded_matrix.shape))
print('Output Shape : {}'.format(output_encoded_matrix.shape))

Contents Shape : (47646, 278, 128)
Summaries Shape : (47646, 278, 128)
Output Shape : (47646, 278)


In [16]:
# LIMIT = 20000

In [17]:
# origin_encoded_matrix = origin_encoded_matrix[:LIMIT,:,:]
# summary_encoded_matrix = summary_encoded_matrix[:LIMIT,:,:]
# output_encoded_matrix = output_encoded_matrix[:LIMIT,:]

In [18]:
print('Contents Shape : {}'.format(origin_encoded_matrix.shape))
print('Summaries Shape : {}'.format(summary_encoded_matrix.shape))
print('Output Shape : {}'.format(output_encoded_matrix.shape))

Contents Shape : (47646, 278, 128)
Summaries Shape : (47646, 278, 128)
Output Shape : (47646, 278)


In [19]:
dataset = tf.data.Dataset.from_tensor_slices((
    {
        'encoder_inputs': origin_encoded_matrix, # Encoder Input
        'decoder_inputs': summary_encoded_matrix[:, :-1, :] # Decoder Input
    },
    {
        # Decoder Output, Remove <SOS>
        'Output': output_encoded_matrix[:, 1:]  
    },
))

In [20]:
dataset = dataset.cache()
dataset = dataset.shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [21]:
for dict_1, dict_2 in dataset.as_numpy_iterator():
    print("encoder input : {enc}, decoder input : {dec}".format(enc= dict_1['encoder_inputs'].shape,
                                                               dec=dict_1['decoder_inputs'].shape))
    print("output shape : {}".format(dict_2['Output'].shape))

encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), 

encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), 

encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), 

output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder

encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), 

encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), 

encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), 

encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), decoder input : (64, 277, 128)
output shape : (64, 277)
encoder input : (64, 278, 128), 

학습률 Learning Rate 조정

In [22]:
lrate_scheduler = LearningRate(d_model=D_MODEL)

Optimizer 정의

In [23]:
beta_1 = 0.9  
beta_2 = 0.98
epsilon = 10 ** -9

optimizer = tf.keras.optimizers.Adam(lrate_scheduler, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

Transformer 모델 정의

In [24]:
with tf.device('/CPU:0'):
    model = Transformer(
        vocab_size=VOCAB_SIZE,
        layer_num=LAYER_NUM,
        dff=DFF,
        d_model=D_MODEL,
        num_heads=NUM_HEADS,
        dropout = 0.3).get_transformer()

(1, 58112, 128)
encoder_layer_0 sub-layer 1
[Input] Q shape : (None, None, 128), K shape : (None, None, 128), V shape : (None, None, 128)

[Dense] Q shape : (None, None, 128), K shape : (None, None, 128), V shape : (None, None, 128)

[Splited] Q shape : (None, 8, None, 16), K shape : (None, 8, None, 16), V shape : (None, 8, None, 16)

encoder_layer_0 sub-layer 2
encoder_layer_1 sub-layer 1
[Input] Q shape : (None, None, 128), K shape : (None, None, 128), V shape : (None, None, 128)

[Dense] Q shape : (None, None, 128), K shape : (None, None, 128), V shape : (None, None, 128)

[Splited] Q shape : (None, 8, None, 16), K shape : (None, 8, None, 16), V shape : (None, 8, None, 16)

encoder_layer_1 sub-layer 2
encoder_layer_2 sub-layer 1
[Input] Q shape : (None, None, 128), K shape : (None, None, 128), V shape : (None, None, 128)

[Dense] Q shape : (None, None, 128), K shape : (None, None, 128), V shape : (None, None, 128)

[Splited] Q shape : (None, 8, None, 16), K shape : (None, 8, None, 1

In [25]:
def loss_function(y_true, y_pred):
    y_true = tf.reshape(y_true, shape=(-1, MAX_LEN - 1))
    loss = tf.keras.losses.SparseCategoricalCrossentropy(
      from_logits=True, reduction='none')(y_true, y_pred)
    mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
    loss = tf.multiply(loss, mask)
    
    return tf.reduce_mean(loss+ 1e-9)

In [26]:
from tensorflow.keras.losses import sparse_categorical_crossentropy

with tf.device('/CPU:0'):
    model.compile(optimizer=optimizer, loss=loss_function, metrics=[sparse_categorical_crossentropy])
    model.summary()

Model: "Transformer"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     [(None, None, 128)]  0                                            
__________________________________________________________________________________________________
decoder_inputs (InputLayer)     [(None, None, 128)]  0                                            
__________________________________________________________________________________________________
Encoder (Functional)            (None, None, 128)    1189632     encoder_inputs[0][0]             
__________________________________________________________________________________________________
Decoder (Functional)            (None, None, 128)    1587456     decoder_inputs[0][0]             
                                                                 Encoder[0][0]          

In [None]:
with tf.device('/GPU:0'):
    model.fit(dataset, batch_size=BATCH_SIZE, epochs=50, verbose=2, shuffle=True)


Epoch 1/50


In [None]:
mkdir_p(MODEL_DIR)
model_num = len(list(iglob(os.path.join(MODEL_DIR, '**.h5'), recursive=False)))
print(model_num)
model_path = os.path.join(MODEL_DIR, 'transformer-{}.h5'.format(model_num))

model.save(model_path)
