In [1]:
import os
import json
import torch

from src.model.transformer import TransformerModel
from src.model import check_model_params, build_model
import src.data.loader as loader
from src.trainer import EncDecTrainer
from src.evaluation.evaluator import EncDecEvaluator

import myutils

FAISS library was not found.
FAISS not available. Switching to standard nearest neighbors search implementation.


In [2]:
MODEL_PATH = 'mlm_enfr_1024.pth'
SRC_LANG = 'en' # distinction between SRC/TGT not really necessary?
TGT_LANG = 'fr'

In [3]:
parser = myutils.get_parser()
params = parser.parse_args([
    '--dump_path', './dumped/',
    '--reload_model', '{0},{0}'.format(MODEL_PATH),
    
    ## data
    '--data_path', './data/processed/en-fr/',
    '--lgs', '{0}-{1}'.format(SRC_LANG, TGT_LANG),
    '--ae_steps', '{0},{1}'.format(SRC_LANG, TGT_LANG),
    '--bt_steps', '{0}-{1}-{0},{1}-{0}-{1}'.format(SRC_LANG, TGT_LANG),
    '--word_shuffle', '3',
    '--word_dropout', '0.1',
    '--word_blank', '0.1',
    '--lambda_ae', '0:1,100000:0.1,300000:0',
    
    ## transformer
    '--encoder_only', 'false',
    '--emb_dim', '1024',                                                
    '--n_layers', '6',                                                  
    '--n_heads', '8',                                                 
    '--dropout', '0.1',                                       
    '--attention_dropout', '0.1',                                      
    '--gelu_activation', 'true',
    
    ## optimization
    '--tokens_per_batch', '2000',                                       # use batches with a fixed number of words
    '--batch_size', '32',                                               # batch size (for back-translation)
    '--bptt', '256',                                                    # sequence length
    '--optimizer', 'adam_inverse_sqrt,beta1=0.9,beta2=0.98,lr=0.0001',  # optimizer
    '--epoch_size', '200000',                                           # number of sentences per epoch
    '--eval_bleu', 'true',                                              # also evaluate the BLEU score
    '--stopping_criterion', 'valid_en-fr_mt_bleu,10',                   # validation metric (when to save the best model)
    '--validation_metrics', 'valid_en-fr_mt_bleu',
])

# print(params)

In [4]:
myutils.check_data_params(params)
check_model_params(params)

In [5]:
# GPU stuff

params.n_nodes = 1
params.node_id = 0
params.local_rank = 0
params.global_rank = 0
params.world_size = 1
params.n_gpu_per_node = 1
params.is_master = params.node_id == 0 and params.local_rank == 0
params.multi_node = params.n_nodes > 1
params.multi_gpu = params.world_size > 1

In [6]:
params.max_epoch

100000

In [11]:
print(params.mono_dataset)
print(params.para_dataset)

{'en': {'train': './data/processed/en-fr/train.en.pth', 'valid': './data/processed/en-fr/valid.en.pth', 'test': './data/processed/en-fr/test.en.pth'}, 'fr': {'train': './data/processed/en-fr/train.fr.pth', 'valid': './data/processed/en-fr/valid.fr.pth', 'test': './data/processed/en-fr/test.fr.pth'}}
{('en', 'fr'): {'valid': ('./data/processed/en-fr/valid.en-fr.en.pth', './data/processed/en-fr/valid.en-fr.fr.pth'), 'test': ('./data/processed/en-fr/test.en-fr.en.pth', './data/processed/en-fr/test.en-fr.fr.pth')}}


In [7]:
data_dict = loader.load_data(params)

# data_dict

Monolingual data   - train -           en:   5000000
Monolingual data   - valid -           en:      3000
Monolingual data   -  test -           en:      3003
Monolingual data   - train -           fr:   5000000
Monolingual data   - valid -           fr:      3000
Monolingual data   -  test -           fr:      3003
Parallel data      - valid -        en-fr:      3000
Parallel data      -  test -        en-fr:      3003


{'mono': {'en': {'train': <src.data.dataset.Dataset at 0x7fb7c845ae10>,
   'valid': <src.data.dataset.Dataset at 0x7fb7c7c8c4a8>,
   'test': <src.data.dataset.Dataset at 0x7fb7c47bc4e0>},
  'fr': {'train': <src.data.dataset.Dataset at 0x7fb7c3df75f8>,
   'valid': <src.data.dataset.Dataset at 0x7fb7c48b6668>,
   'test': <src.data.dataset.Dataset at 0x7fb7c3eb66d8>}},
 'mono_stream': {'en': {'train': <src.data.dataset.StreamDataset at 0x7fb7c845ada0>,
   'valid': <src.data.dataset.StreamDataset at 0x7fb844118080>,
   'test': <src.data.dataset.StreamDataset at 0x7fb7c47bc518>},
  'fr': {'train': <src.data.dataset.StreamDataset at 0x7fb7c3df7630>,
   'valid': <src.data.dataset.StreamDataset at 0x7fb7c48b66a0>,
   'test': <src.data.dataset.StreamDataset at 0x7fb7c3eb6710>}},
 'dico': <src.data.dictionary.Dictionary at 0x7fb7c845ae48>,
 'para': {('en',
   'fr'): {'valid': <src.data.dataset.ParallelDataset at 0x7fb7c3eb67f0>, 'test': <src.data.dataset.ParallelDataset at 0x7fb7c4c3dfd0>}}}

In [7]:
encoder, decoder = build_model(params, data_dict['dico'])

Parameter layer_norm15.0.weight not found.
Parameter layer_norm15.0.bias not found.
Parameter encoder_attn.0.q_lin.weight not found.
Parameter encoder_attn.0.q_lin.bias not found.
Parameter encoder_attn.0.k_lin.weight not found.
Parameter encoder_attn.0.k_lin.bias not found.
Parameter encoder_attn.0.v_lin.weight not found.
Parameter encoder_attn.0.v_lin.bias not found.
Parameter encoder_attn.0.out_lin.weight not found.
Parameter encoder_attn.0.out_lin.bias not found.
Parameter layer_norm15.1.weight not found.
Parameter layer_norm15.1.bias not found.
Parameter encoder_attn.1.q_lin.weight not found.
Parameter encoder_attn.1.q_lin.bias not found.
Parameter encoder_attn.1.k_lin.weight not found.
Parameter encoder_attn.1.k_lin.bias not found.
Parameter encoder_attn.1.v_lin.weight not found.
Parameter encoder_attn.1.v_lin.bias not found.
Parameter encoder_attn.1.out_lin.weight not found.
Parameter encoder_attn.1.out_lin.bias not found.
Parameter layer_norm15.2.weight not found.
Parameter lay

In [14]:
# encoder, decoder = encoder.cuda(), decoder.cuda()

In [8]:
trainer = EncDecTrainer(encoder, decoder, data_dict, params)
evaluator = EncDecEvaluator(trainer, data_dict, params)

In [9]:
scores = evaluator.run_mt(trainer)
for k, v in scores.items():
    print("%s -> %.6f" % (k, v))
    
print("__log__:%s" % json.dumps(scores))





















epoch -> 0.000000
valid_en-fr_mt_ppl -> 52451.574560
valid_en-fr_mt_acc -> 2.231640
valid_en-fr_mt_bleu -> 0.000000
valid_fr-en_mt_ppl -> 145008.872124
valid_fr-en_mt_acc -> 1.020254
valid_fr-en_mt_bleu -> 0.000000
test_en-fr_mt_ppl -> 57607.222301
test_en-fr_mt_acc -> 2.299841
test_en-fr_mt_bleu -> 0.000000
test_fr-en_mt_ppl -> 136549.540339
test_fr-en_mt_acc -> 0.744115
test_fr-en_mt_bleu -> 0.000000
__log__:{"epoch": 0, "valid_en-fr_mt_ppl": 52451.57455993151, "valid_en-fr_mt_acc": 2.2316401283526064, "valid_en-fr_mt_bleu": 0.0, "valid_fr-en_mt_ppl": 145008.8721235925, "valid_fr-en_mt_acc": 1.0202538259518472, "valid_fr-en_mt_bleu": 0.0, "test_en-fr_mt_ppl": 57607.22230133627, "test_en-fr_mt_acc": 2.2998410815408374, "test_en-fr_mt_bleu": 0.0, "test_fr-en_mt_ppl": 136549.5403391505, "test_fr-en_mt_acc": 0.7441154138192863, "test_fr-en_mt_bleu": 0.0}
