In [18]:
import train_module
import json
import pandas as pd
import tensorflow as tf

# Evaluate

The following steps are used for evaluation:


1. 입력 문장을 Encode한다.
    - 고어 -> id 변환 (id 없으면 `<OOV>`)
    - `<START>`와 `<END>` 토큰 추가
2. 디코더의 입력은 `<START>`토큰.
3. padding mask와 look-ahead mask를 계산한다.
4. 디코더는 인코더의 출력과 자신의 출력을 계산해서 예측한다.
5. 마지막 단어를 고르고 그 단어의 argmax를 계산한다.
6. 출력 단어를 디코더에 다시 통과시킨다.
7. In this approach, the decoder predicts the next word based on the previous words it predicted.

In [116]:
df = pd.read_hdf('../data/tokenized_10thousand.hdf', stop=32)

In [3]:
with open('./id_dict/input_id.json', 'r') as fp:
    input_id = json.load(fp)
with open('./id_dict/target_id.json', 'r') as fp:
    target_id = json.load(fp)

In [4]:
reverse_input_id = {i:char for char, i in input_id.items()}
reverse_input_id[1] = '_'
reverse_target_id = {i:char for char, i in target_id.items()}
reverse_target_id[1] = '_'

In [5]:
MAX_LENGTH = 200

In [81]:
num_layers = 4
d_model = 128
dff = 512
num_heads = 8

input_vocab_size = len(input_id) + 2
target_vocab_size = len(target_id) +2
dropout_rate = 0.1

In [97]:
transformer = train_module.functions.Transformer(num_layers, d_model, num_heads, dff,
                          input_vocab_size, target_vocab_size, 
                          pe_input=input_vocab_size, 
                          pe_target=target_vocab_size,
                          rate=dropout_rate)

checkpoint_path = "./model/"

ckpt = tf.train.Checkpoint(transformer=transformer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print ('Latest checkpoint restored!!')































Latest checkpoint restored!!


In [108]:
def evaluate(inp_sentence):
    start_token = [len(input_id)]
    end_token = [len(input_id)+ 1]

    inp_sentence = [start_token+ list(map(lambda x:input_id.setdefault(x,1), inp_sentence)) + end_token]
    inp_sentence = tf.convert_to_tensor(
        inp_sentence[0][:200],dtype=tf.int64)
    encoder_input = tf.expand_dims(inp_sentence, 0)
    
    # as the target is english, the first word to the transformer should be the
    # english start token.
    decoder_input = [[len(target_id)]]
    output = tf.convert_to_tensor(
        tf.keras.preprocessing.sequence.pad_sequences(
            decoder_input, maxlen=200, padding='post', truncating='post'),dtype=tf.int64)
#     output = tf.expand_dims(decoder_input, 0)
    
    
    for i in range(MAX_LENGTH):
        enc_padding_mask, combined_mask, dec_padding_mask = train_module.functions.create_masks(encoder_input, output)
        # predictions.shape == (batch_size, seq_len, vocab_size)
#         print(encoder_input)
#         print(output)
        predictions, attention_weights = transformer(
            encoder_input,
            output,
            False,
            enc_padding_mask,
            combined_mask,
            dec_padding_mask)

        # select the last word from the seq_len dimension
        predictions = predictions[: ,-1:, :]  # (batch_size, 1, vocab_size)

        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        # return the result if the predicted_id is equal to the end token
        if predicted_id == len(target_id)+1:
            return tf.squeeze(output, axis=0), attention_weights

        # concatentate the predicted_id to the output which is given to the decoder
        # as its input.
        output = tf.concat([output, predicted_id], axis=-1)

    return tf.squeeze(output, axis=0), attention_weights


In [121]:
    # The @tf.function trace-compiles train_step into a TF graph for faster execution. The function specializes to the precise shape of the argument tensors. To avoid re-tracing due to the variable sequence lengths or variable batch sizes (the last batch is smaller), use input_signature to specify more generic shapes.

    train_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
    ]

    @tf.function(input_signature=train_step_signature)
    def train_step(encoder_input, output):
    enc_padding_mask, combined_mask, dec_padding_mask = train_module.functions.create_masks(encoder_input, output)
    with tf.GradientTape() as tape:
        predictions, attention_weights = transformer(encoder_input, 
                                                     output,
                                                     False,
                                                     enc_padding_mask,
                                                     combined_mask,
                                                     dec_padding_mask)
    predictions = predictions[: ,-1:, :]
    predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

    # return the result if the predicted_id is equal to the end token
    if predicted_id == len(target_id)+1:
        return tf.squeeze(output, axis=0), attention_weights

    # concatentate the predicted_id to the output which is given to the decoder
    # as its input.
    output = tf.concat([output, predicted_id], axis=-1)

    def evaluate(inp_sentence):
    start_token = [len(input_id)]
    end_token = [len(input_id)+ 1]

    inp_sentence = [start_token+ list(map(lambda x:input_id.setdefault(x,1), inp_sentence)) + end_token]
    inp_sentence = tf.convert_to_tensor(
        inp_sentence[0][:200],dtype=tf.int64)
    encoder_input = tf.expand_dims(inp_sentence, 0)

    # as the target is english, the first word to the transformer should be the
    # english start token.
    decoder_input = [[len(target_id)]]
    output = tf.convert_to_tensor(
        tf.keras.preprocessing.sequence.pad_sequences(
            decoder_input, maxlen=200, padding='post', truncating='post'),dtype=tf.int64)

    train_step(encoder_input, output)



In [47]:
def plot_attention_weights(attention, sentence, result, layer):
    fig = plt.figure(figsize=(16, 8))

    sentence = list(map(lambda x:input_id.setdefault(x,1), sentence))

    attention = tf.squeeze(attention[layer], axis=0)

    for head in range(attention.shape[0]):
        ax = fig.add_subplot(2, 4, head+1)

        # plot the attention weights
        ax.matshow(attention[head][:-1, :], cmap='viridis')

        fontdict = {'fontsize': 10}

        ax.set_xticks(range(len(sentence)+2))
        ax.set_yticks(range(len(result)))

        ax.set_ylim(len(result)-1.5, -0.5)

        ax.set_xticklabels(
            ['<start>']+[reverse_input_id[i] for i in sentence]+['<end>'], 
            fontdict=fontdict, rotation=90)

        ax.set_yticklabels([reverse_target_id[i] for i in result], 
                           fontdict=fontdict)

        ax.set_xlabel('Head {}'.format(head+1))

    plt.tight_layout()
    plt.show()

In [48]:
def translate(sentence, plot=''):
    result, attention_weights = evaluate(sentence)

    predicted_sentence = [reverse_target_id[i] for i in result]

    print('Input: {}'.format(sentence))
    print('Predicted translation: {}'.format(predicted_sentence))

    if plot:
        plot_attention_weights(attention_weights, sentence, result, plot)

In [122]:
evaluate(df.original[4])

















ValueError: in converted code:

    <ipython-input-121-97bb971e393a>:12 train_step  *
        predictions, attention_weights = transformer(encoder_input,
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:847 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    E:\BOAZ\ADV\notebook\train_module\functions.py:310 call  *
        dec_output, attention_weights = self.decoder(
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:847 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    E:\BOAZ\ADV\notebook\train_module\functions.py:275 call  *
        x = self.embedding(x)  # (batch_size, target_seq_len, d_model)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:817 __call__
        self._maybe_build(inputs)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:2141 _maybe_build
        self.build(input_shapes)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\keras\utils\tf_utils.py:306 wrapper
        output_shape = fn(instance, input_shape)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\keras\layers\embeddings.py:146 build
        constraint=self.embeddings_constraint)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:522 add_weight
        aggregation=aggregation)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\training\tracking\base.py:725 _add_variable_with_custom_getter
        name=name, shape=shape)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\training\tracking\base.py:792 _preload_simple_restoration
        checkpoint_position=checkpoint_position, shape=shape)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\training\tracking\base.py:75 __init__
        self.wrapped_value.set_shape(shape)
    C:\Users\skarn\Anaconda3\envs\advpjt\lib\site-packages\tensorflow_core\python\framework\ops.py:1074 set_shape
        (self.shape, shape))

    ValueError: Tensor's shape (51112, 128) is not compatible with supplied shape (10005, 128)
