# Demo

### First, let's import the libraries and configure logger. 
It will print lists of implemented models and data loaders.

In [1]:
from config import basic_conf as conf
from libs import ModelManager as mm
from config.constants import HyperParamKey
import logging

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
conf.init_logger(logging.DEBUG)
logger = logging.getLogger('__main__')
config_update = {'data_path': '/Users/xliu/Downloads/iwslt-vi-en/'}
mgr = mm.ModelManager(mode='notebook', control_overrides=config_update)

[2018-11-04 11:30:42] [INFO] Initializing Model Manager, version 0.4.0 ...
[2018-11-04 11:30:42] [INFO] 
=== Models Available ===
BagOfWords
[2018-11-04 11:30:42] [INFO] 
=== Loaders Available ===
IMDB
IWSLT
[2018-11-04 11:30:42] [INFO] 
*********** Model Manager Details ***********
-- self.hparams.num_epochs = 1
-- self.hparams.lr = 0.01
-- self.hparams.train_plus_val_size = 25000
-- self.hparams.test_size = 25000
-- self.hparams.val_size = 5000
-- self.hparams.voc_size = 100000
-- self.hparams.train_loop_check_freq = 100
-- self.hparams.embedding_dim = 50
-- self.hparams.batch_size = 32
-- self.hparams.ngram_size = 2
-- self.hparams.remove_punc = True
-- self.hparams.check_early_stop = True
-- self.hparams.es_look_back = 5
-- self.hparams.es_req_prog = 0.01
-- self.hparams.optim_enc = <class 'torch.optim.adam.Adam'>
-- self.hparams.optim_dec = <class 'torch.optim.adam.Adam'>
-- self.hparams.scheduler = <class 'torch.optim.lr_scheduler.ExponentialLR'>
-- self.hparams.scheduler_gamma =

### Now let's load the data for translation task: IWSLT

In [3]:
mgr.load_data(mm.loaderRegister.IWSLT)

[2018-11-04 11:30:44] [INFO] Loading data using IWSLT ...
[2018-11-04 11:30:44] [INFO] Get source language datum list...
[2018-11-04 11:30:46] [INFO] Get target language datum list...
[2018-11-04 11:34:37] [INFO] Generated indexer for both src/target languages!
[2018-11-04 11:34:37] [INFO] Convert token to index for source language ...
[2018-11-04 11:34:39] [INFO] Convert token to index for target language ...


In [4]:
source_sample = mgr.dataloader.data['source'][0][0]

In [5]:
source_sample.tokens

['Khoa_học', 'đằng_sau', 'một', 'tiêu_đề', 'về', 'khí_hậu']

In [6]:
source_sample.token_indices

[2718, 1361, 7, 3495, 31, 882]

### Try load the indexers from file

In [7]:
mgr.load_data(mm.loaderRegister.IWSLT)

[2018-11-04 12:07:28] [INFO] Loading data using IWSLT ...
[2018-11-04 12:07:29] [INFO] Get source language datum list...
[2018-11-04 12:07:31] [INFO] Get target language datum list...
[2018-11-04 12:07:32] [INFO] Language indexer found and loaded!
[2018-11-04 12:07:32] [INFO] Convert token to index for source language ...
[2018-11-04 12:07:33] [INFO] Convert token to index for target language ...


In [8]:
source_sample = mgr.dataloader.data['source'][2][0]
source_sample.tokens

['Làm_sao',
 'tôi',
 'có_thể',
 'trình_bày',
 'trong',
 '10',
 'phút',
 'về',
 'sợi',
 'dây',
 'liên_kết',
 'những',
 'người',
 'phụ_nữ',
 'qua',
 'ba',
 'thế_hệ',
 ',',
 'về',
 'việc_làm',
 'thế_nào',
 'những',
 'sợi',
 'dây',
 'mạnh_mẽ',
 'đáng',
 'kinh_ngạc',
 'ấy',
 'đã',
 'níu',
 'chặt',
 'lấy',
 'cuộc_sống',
 'của',
 'một',
 'cô',
 'bé',
 'bốn',
 'tuổi',
 'co_quắp',
 'với',
 'đứa',
 'em_gái',
 'nhỏ',
 'của',
 'cô',
 'bé',
 ',',
 'với',
 'mẹ',
 'và',
 'bà',
 'trong_suốt',
 'năm',
 'ngày_đêm',
 'trên',
 'con',
 'thuyền',
 'nhỏ',
 'lênh_đênh',
 'trên',
 'Biển',
 'Đông',
 'hơn',
 '30',
 'năm',
 'trước',
 ',',
 'những',
 'sợi',
 'dây',
 'liên_kết',
 'đã',
 'níu',
 'lấy',
 'cuộc_đời',
 'cô_bé',
 'ấy',
 'và',
 'không',
 'bao_giờ',
 'rời',
 'đi',
 '-',
 '-',
 'cô_bé',
 'ấy',
 'giờ',
 'sống',
 'ở',
 'San',
 'Francisco',
 'và',
 'đang',
 'nói_chuyện',
 'với',
 'các',
 'bạn',
 'hôm_nay',
 '?']