In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

## References

- [Natural Language Understanding with Sequence to Sequence Models](https://towardsdatascience.com/natural-language-understanding-with-sequence-to-sequence-models-e87d41ad258b)
- [BERT for dummies](https://towardsdatascience.com/bert-for-dummies-step-by-step-tutorial-fb90890ffe03)
- [ATIS Dataset from MS CNTK](https://www.kaggle.com/siddhadev/atis-dataset-from-ms-cntk)

In [9]:
import glob
import itertools

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
font_dirs = ['/usr/share/fonts/truetype/nanum']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
font_list = font_manager.createFontList(font_files)
font_manager.fontManager.ttflist.extend(font_list)
plt.rcParams['font.family'] = 'NanumGothic'

In [29]:
from tutorial_bert_func import load_ds, load_atis

# load ATIS training dataset
t2i_train, s2i_train, in2i_train, i2t_train, i2s_train, i2in_train, input_tensor_train, target_tensor_train, query_data_train, intent_data_train, intent_data_label_train, slot_data_train = load_atis('atis.train.pkl')
data_train = [t2i_train, s2i_train, in2i_train, i2t_train, i2s_train, i2in_train, input_tensor_train, target_tensor_train, query_data_train, intent_data_train, intent_data_label_train, slot_data_train]
var_train = ['t2i_train', 's2i_train', 'in2i_train', 'i2t_train', 'i2s_train', 'i2in_train', 'input_tensor_train', 'target_tensor_train', 'query_data_train', 'intent_data_train', 'intent_data_label_train', 'slot_data_train']

# load ATIS testing dataset
t2i_test, s2i_test, in2i_test, i2t_test, i2s_test, i2in_test, input_tensor_test, target_tensor_test, query_data_test, intent_data_test, intent_data_label_test, slot_data_test = load_atis('atis.test.pkl')

Done  loading:  data/ms-cntk-atis/atis.train.pkl
      samples: 4978
   vocab_size:  943
   slot count:  129
 intent count:   26
Query text: BOS give me the flights from phoenix to milwaukee on wednesday EOS
Query vector:  [178 449 581 827 429 444 681 851 595 654 908 179]
Intent label:  flight
Slot text:  O O O O O O B-fromloc.city_name O B-toloc.city_name O B-depart_date.day_name O
Slot vector:  [128, 128, 128, 128, 128, 128, 48, 128, 78, 128, 26, 128]
**************************************************************************
Query text: BOS i would like a flight as early as possible in the day leaving from boston and to denver EOS
Query vector:  [178 479 932 545 180 428 239 388 239 690 482 827 342 539 444 266 215 851
 351 179]
Intent label:  flight
Slot text:  O O O O O O O B-flight_mod O O O O O O O B-fromloc.city_name O O B-toloc.city_name O
Slot vector:  [128, 128, 128, 128, 128, 128, 128, 42, 128, 128, 128, 128, 128, 128, 128, 48, 128, 128, 78, 128]
******************************

In [32]:
import inspect

def retrieve_name(var):
    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
    return [var_name for var_name, var_val in callers_local_vars if var_val is var]


def check_data(var_list, variable_list) : 
    for var_name, variable in zip(var_list, variable_list) :
        print('\n%s'%var_name)

        if isinstance(variable, dict) : 
        
            print(dict(itertools.islice(variable.items(), 2)))
        elif isinstance(variable, list) : 
            print(variable[0:2])
        elif isinstance(variable, np.ndarray) : 
            print(variable[0:2])
        else :
            print('Unseen Type')
    return
check_data(var_train, data_train)


t2i_train
{"'d": 0, "'hare": 1}

s2i_train
{'B-aircraft_code': 0, 'B-airline_code': 1}

in2i_train
{'abbreviation': 0, 'aircraft': 1}

i2t_train
{0: "'d", 1: "'hare"}

i2s_train
{0: 'B-aircraft_code', 1: 'B-airline_code'}

i2in_train
{0: 'abbreviation', 1: 'aircraft'}

input_tensor_train
[array([178, 479, 902, 851, 431, 444, 266, 240, 168, 210, 215, 236, 482,
       351, 240,  27, 482, 827, 606, 179]), array([178, 916, 429, 228, 244, 444, 682, 851, 247, 654, 845, 606, 179])]

target_tensor_train
[[128, 128, 128, 128, 128, 128, 48, 128, 35, 100, 128, 128, 128, 78, 128, 14, 128, 128, 12, 128], [128, 128, 128, 128, 128, 128, 48, 128, 78, 128, 26, 33, 128]]

query_data_train
[' i want to fly from boston at 838 am and arrive in denver at 1110 in the morning '
 ' what flights are available from pittsburgh to baltimore on thursday morning ']

intent_data_train
['flight' 'flight']

intent_data_label_train
[14 14]

slot_data_train
[' O O O O O B-fromloc.city_name O B-depart_time.time I-depart_