In [1]:
%%capture
%run ../config/config.ipynb
%run ./Transformer.ipynb

In [2]:
import torch

In [3]:
# import the model in the models/ directory 
model_name = "model_20241119_142930_2"
model_path = model_dir / model_name

In [4]:
# Load the trained model 
model = Transformer(
    src_pad_token=src_pad_token, 
    trg_pad_token=trg_pad_token, 
    enc_voc_size=enc_voc_size, 
    dec_voc_size=dec_voc_size, 
    n_head=n_head, 
    max_len=max_len, 
    d_model=d_model, 
    ffn_hidden=ffn_hidden, 
    n_layers=n_layers, 
    drop_prob=drop_prob, 
    device=device).to(device)

# load weights from the saved model 
model.load_state_dict(torch.load(model_path, weights_only=True, map_location=device))
model.eval()

Transformer(
  (encoder): Encoder(
    (emb): TransformerEmbedding(
      (token_emb): TokenEmbeddings(65001, 400, padding_idx=1)
      (position_emb): PositionalEncoding()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (layers): ModuleList(
      (0-3): 4 x EncoderLayer(
        (attention): MultiHeadAttentionBlock(
          (attention): AttentionBlock(
            (softmax): Softmax(dim=-1)
          )
          (Wq): Linear(in_features=400, out_features=400, bias=True)
          (Wk): Linear(in_features=400, out_features=400, bias=True)
          (Wv): Linear(in_features=400, out_features=400, bias=True)
          (Wconcat): Linear(in_features=400, out_features=400, bias=True)
        )
        (norm1): LayerNorm()
        (dropout1): Dropout(p=0.1, inplace=False)
        (ffn): FeedForwardBlock(
          (linear1): Linear(in_features=400, out_features=1200, bias=True)
          (linear2): Linear(in_features=1200, out_features=400, bias=True)
          (relu): ReLU()
   

In [5]:
# define a helper function 
def tensor2tokenid(tensor): 
    # convert tensor to tokenid 
    # by picking index that has max value 
    # use argmax 
    token_id = torch.argmax(tensor, dim=-1)
    return token_id

In [21]:
# Create a inference function that takes korean sentence, and outputs english sentence.
# for generating english sentence, we are going to limit the output token length by limit_token_length
def inference(kr_sentence): 
    with torch.no_grad():
        limit_token_length = max_len - 1
        # since we are going to generete English sentence in autoregressive manner, 
        # we have to define the starting point.
        # make a max_len length tensor that first element is trg_sos_token, 
        # and others filled with trg_pad_token
        # reshape to set batch_size as 1 
        trg_token_ids = en_tokenizer.encode('</s>', return_tensors="pt", add_special_tokens=False).to(device)
        # trg_token_ids[0] = trg_eos_token
        trg_token_ids = trg_token_ids.reshape((1, -1)).to(device)
        # print(trg_token_ids.shape)
        # print(trg_token_ids.type())
        
        # tokenize src sentence 
        src_token_ids = kr_tokenizer(kr_sentence, add_special_tokens=True, padding="max_length", max_length=max_len, truncation=True).input_ids
        src_token_ids = torch.tensor(src_token_ids).reshape((1, -1)).to(device)
        # print(src_token_ids.shape)
        # print(src_token_ids.type())
        
        # make src_mask and trg_mask
        src_mask = model.make_src_mask(src_token_ids)
        # print(src_mask)
    
        # get enc_src using model.encoder 
        # we can use whole transformer iteratively, but 
        # since we are going to reuse enc_src, 
        # we are going to cache it, and reuse it
        enc_src = model.encoder(src_token_ids, src_mask)
        for idx in range(1, limit_token_length): 
            trg_mask = model.make_trg_mask(trg_token_ids)
            # print(trg_mask)
            # print(f'trg_token_ids: {trg_token_ids.shape}')
            next_token_tensor = model.decoder(trg_token_ids, enc_src, trg_mask, src_mask)[0, -1, :]
            # print(f'next_token_tensor shape: {next_token_tensor.shape}')
            next_token_id = tensor2tokenid(next_token_tensor)
            # print(f'next_token_id: {next_token_id}')
    
            # append it to the trg 
            # print(f'trg_token_ids shape: {trg_token_ids.shape}')
            # print(f'next_token_id.reshape(1, -1) shape: {next_token_id.reshape(1, -1).shape}')
            trg_token_ids = torch.cat([trg_token_ids, next_token_id.reshape(1, -1)], dim=1)
            
            # break if next_token_id is end token 
            if next_token_id == trg_eos_token: 
                break
    
        # convert trg into string 
        print(f'{trg_token_ids.reshape(-1)}')
        return en_tokenizer.decode(trg_token_ids.reshape(-1), skip_special_tokens=False)

In [24]:
translated_sentence = inference("안녕하세요, 제 이름은 최진호입니다.")
print(translated_sentence)

tensor([    0,    16,    12,   107,     9,   639,  6229,    10,     9,  2253,
         5419, 23269,  3539,   101,     3,     8,     9,  3184,     6,     4,
            9,  4952,  4747,     2,     0], device='mps:0')
</s> I'm always atmosphere, and one of the person.</s>
