In [12]:
import os
import json
import torch

from src.model.transformer import TransformerModel
from src.model import check_model_params, build_model
import src.data.loader as loader
from src.trainer import EncDecTrainer
from src.evaluation.evaluator import EncDecEvaluator, convert_to_text

import myutils

In [2]:
MODEL_PATH = 'mlm_enfr_1024.pth'
SRC_LANG = 'en' # distinction between SRC/TGT not really necessary?
TGT_LANG = 'fr'

In [3]:
parser = myutils.get_parser()
params = parser.parse_args([
    '--dump_path', './dumped/',
    '--reload_model', '{0},{0}'.format(MODEL_PATH),
    
    ## data
    '--data_path', './data/processed/en-fr/',
    '--lgs', '{0}-{1}'.format(SRC_LANG, TGT_LANG),
    '--ae_steps', '{0},{1}'.format(SRC_LANG, TGT_LANG),
    '--bt_steps', '{0}-{1}-{0},{1}-{0}-{1}'.format(SRC_LANG, TGT_LANG),
    '--word_shuffle', '3',
    '--word_dropout', '0.1',
    '--word_blank', '0.1',
    '--lambda_ae', '0:1,100000:0.1,300000:0',
    
    ## transformer
    '--encoder_only', 'false',
    '--emb_dim', '1024',                                                
    '--n_layers', '6',                                                  
    '--n_heads', '8',                                                 
    '--dropout', '0.1',                                       
    '--attention_dropout', '0.1',                                      
    '--gelu_activation', 'true',
    
    ## optimization
    '--tokens_per_batch', '2000',                                       # use batches with a fixed number of words
    '--batch_size', '32',                                               # batch size (for back-translation)
    '--bptt', '256',                                                    # sequence length
    '--optimizer', 'adam_inverse_sqrt,beta1=0.9,beta2=0.98,lr=0.0001',  # optimizer
    '--epoch_size', '200000',                                           # number of sentences per epoch
    '--eval_bleu', 'true',                                              # also evaluate the BLEU score
    '--stopping_criterion', 'valid_en-fr_mt_bleu,10',                   # validation metric (when to save the best model)
    '--validation_metrics', 'valid_en-fr_mt_bleu',
])

# print(params)

In [4]:
myutils.check_data_params(params)
check_model_params(params)

In [5]:
# GPU stuff

params.n_nodes = 1
params.node_id = 0
params.local_rank = 0
params.global_rank = 0
params.world_size = 1
params.n_gpu_per_node = 1
params.is_master = params.node_id == 0 and params.local_rank == 0
params.multi_node = params.n_nodes > 1
params.multi_gpu = params.world_size > 1

In [6]:
data_dict = loader.load_data(params)

data_dict

{'mono': {'en': {'train': <src.data.dataset.Dataset at 0x7f781f7f6748>,
   'valid': <src.data.dataset.Dataset at 0x7f781f0f4278>,
   'test': <src.data.dataset.Dataset at 0x7f781bc112b0>},
  'fr': {'train': <src.data.dataset.Dataset at 0x7f781b24c3c8>,
   'valid': <src.data.dataset.Dataset at 0x7f781bd0d438>,
   'test': <src.data.dataset.Dataset at 0x7f781b34c4a8>}},
 'mono_stream': {'en': {'train': <src.data.dataset.StreamDataset at 0x7f781f7f64a8>,
   'valid': <src.data.dataset.StreamDataset at 0x7f789c5e0320>,
   'test': <src.data.dataset.StreamDataset at 0x7f781bc112e8>},
  'fr': {'train': <src.data.dataset.StreamDataset at 0x7f781b24c400>,
   'valid': <src.data.dataset.StreamDataset at 0x7f781bd0d470>,
   'test': <src.data.dataset.StreamDataset at 0x7f781b34c4e0>}},
 'dico': <src.data.dictionary.Dictionary at 0x7f781f7f6470>,
 'para': {('en',
   'fr'): {'valid': <src.data.dataset.ParallelDataset at 0x7f781b34c5c0>, 'test': <src.data.dataset.ParallelDataset at 0x7f781c012fd0>}}}

In [7]:
encoder, decoder = build_model(params, data_dict['dico'])

Parameter layer_norm15.0.weight not found.
Parameter layer_norm15.0.bias not found.
Parameter encoder_attn.0.q_lin.weight not found.
Parameter encoder_attn.0.q_lin.bias not found.
Parameter encoder_attn.0.k_lin.weight not found.
Parameter encoder_attn.0.k_lin.bias not found.
Parameter encoder_attn.0.v_lin.weight not found.
Parameter encoder_attn.0.v_lin.bias not found.
Parameter encoder_attn.0.out_lin.weight not found.
Parameter encoder_attn.0.out_lin.bias not found.
Parameter layer_norm15.1.weight not found.
Parameter layer_norm15.1.bias not found.
Parameter encoder_attn.1.q_lin.weight not found.
Parameter encoder_attn.1.q_lin.bias not found.
Parameter encoder_attn.1.k_lin.weight not found.
Parameter encoder_attn.1.k_lin.bias not found.
Parameter encoder_attn.1.v_lin.weight not found.
Parameter encoder_attn.1.v_lin.bias not found.
Parameter encoder_attn.1.out_lin.weight not found.
Parameter encoder_attn.1.out_lin.bias not found.
Parameter layer_norm15.2.weight not found.
Parameter lay

In [14]:
# encoder, decoder = encoder.cuda(), decoder.cuda()

In [8]:
trainer = EncDecTrainer(encoder, decoder, data_dict, params)
evaluator = EncDecEvaluator(trainer, data_dict, params)

In [9]:
it = data_dict['para'][('en', 'fr')]['valid'].get_iterator(
    shuffle=False, group_by_size=True, n_sentences=-1)

In [10]:
params.langs

['en', 'fr']

In [17]:
from tqdm import tqdm

lang1_id = params.lang2id[params.langs[0]]
lang2_id = params.lang2id[params.langs[1]]

for i,batch in tqdm(enumerate(it)):
    batch = batch[::-1]
    (x1, len1), (x2, len2) = batch
    # just create same shape tensor filled with lang_id
    langs1 = x1.clone().fill_(lang1_id)
    langs2 = x2.clone().fill_(lang2_id)
    
    alen = torch.arange(len2.max(), dtype=torch.long, device=len2.device)
    pred_mask = alen[:, None] < len2[None] - 1
    y = x2[1:].masked_select(pred_mask[:-1].bool())
    
    # encode source sentence
    enc1 = encoder('fwd', x=x1, lengths=len1, langs=langs1, causal=False)
    enc1 = enc1.transpose(0, 1)
    # enc1 = enc1.half() if params.fp16 else enc1

    dec2 = decoder('fwd', x=x2, lengths=len2, langs=langs2, causal=True, src_enc=enc1, src_len=len1)
    
    max_len = int(1.5 * len2.max().item() + 10)
    generated, lengths = decoder.generate(dec2, len2, lang2_id, max_len=max_len)
    print(convert_to_text(generated, lengths, data_dict['dico'], params))
    # decode target sentence
    # dec2 = decoder('fwd', x=x2, lengths=len2, langs=langs2, causal=True, src_enc=enc1, src_len=len1)
    
    break

0it [00:00, ?it/s]


AssertionError: 

In [None]:
scores = evaluator.run_mt(trainer)
for k, v in scores.items():
    print("%s -> %.6f" % (k, v))
    
print("__log__:%s" % json.dumps(scores))



