In [3]:
import torch
import json
import sentencepiece as spm
from model import Transformer, Encoder, Decoder
import os 

In [7]:
model_path = 'model/transformer/last_model.pth'
config_path = os.path.join(os.path.dirname(model_path), 'configuration.json')

# 설정 로드
with open(config_path, 'r') as f:
    config = json.load(f)

# SentencePiece 모델 로드
sp = spm.SentencePieceProcessor()
sp.Load(f'data/bpe_4000.model')

# 모델 로드
device = torch.device('cuda:0')
num_vocab = sp.get_piece_size()

encoder = Encoder(
    input_dim=num_vocab,
    hidden_dim=config['hidden_dim'],
    n_layers=config['enc_layer'],
    n_heads=config['enc_head'],
    pf_dim=512,
    dropout_ratio=0.1,
    device=device)

decoder = Decoder(
    output_dim=num_vocab,
    hidden_dim=config['hidden_dim'],
    n_layers=config['dec_layer'],
    n_heads=config['dec_head'],
    pf_dim=512,
    dropout_ratio=0.1,
    device=device)

model = Transformer(
    encoder,
    decoder,
    sp.pad_id(),
    sp.pad_id(),
    device
).to(device)

ckpt = torch.load(model_path)
model.load_state_dict(ckpt)
print(model)


Transformer(
  (encoder): Encoder(
    (tok_embedding): Embedding(4000, 256)
    (pos_embedding): Embedding(500, 256)
    (layers): ModuleList(
      (0-5): 6 x EncoderLayer(
        (self_attn_layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (ff_layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (self_attention): MultiHeadAttentionLayer(
          (fc_q): Linear(in_features=256, out_features=256, bias=True)
          (fc_k): Linear(in_features=256, out_features=256, bias=True)
          (fc_v): Linear(in_features=256, out_features=256, bias=True)
          (fc_o): Linear(in_features=256, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (positionwise_feedforward): PositionwiseFeedforwardLayer(
          (fc_1): Linear(in_features=256, out_features=512, bias=True)
          (fc_2): Linear(in_features=512, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
     

In [8]:

def convert_to_sentence(token_list):
    # '▁' 문자를 공백으로 변환하여 문장을 만듭니다.
    sentence = ''.join([token.replace('▁', ' ') for token in token_list if token != '<eos>'])
    # 앞뒤 공백을 제거하고 반환합니다.
    return sentence.strip()


def inference(txt):
    model.eval()
    with torch.no_grad():
        seq = sp.encode_as_ids(txt)
        seq = torch.cat((
            torch.tensor([sp.bos_id()]),
            torch.tensor(seq),
            torch.tensor([sp.eos_id()])
        ))
        src = seq.unsqueeze(0).to(device)
        src_mask = model.make_src_mask(src)
        enc_src = model.encoder(src, src_mask)
        tgt_indices = [sp.bos_id()]

        
        for _ in range(500):
            tgt_tensor = torch.LongTensor(tgt_indices).unsqueeze(0).to(device)
            tgt_mask = model.make_tgt_mask(tgt_tensor)
            output, attention = model.decoder(tgt_tensor, enc_src, tgt_mask, src_mask)
            pred_token = output.argmax(2)[:,-1].item()
            tgt_indices.append(pred_token)

            if pred_token == sp.eos_id():
                break

        translate = sp.id_to_piece(tgt_indices[1:])
        copy = sp.id_to_piece(src[0].tolist()[1:-1])

        return convert_to_sentence(translate)


In [11]:
input_text = '하영 속았수다'
output_text = inference(input_text)
print(f'입력 : {input_text}')
print(f'번역 : {output_text}')

입력 : 하영 속았수다
번역 : 많이 고생했습니다
