In [10]:
ls

[34matis[m[m/                  atis.test.pkl          atis.train.query.csv
atis.dict.intent.csv   atis.test.query.csv    atis.train.slots.csv
atis.dict.slots.csv    atis.test.slots.csv    [31matis.zip[m[m*
atis.dict.vocab.csv    atis.train.intent.csv  explore.ipynb
atis.test.intent.csv   atis.train.pkl


In [28]:
import pickle
import numpy
import os
import pandas as pd

In [75]:
def load_ds(fname):
    with open(fname, 'rb') as stream:
        ds,dicts = pickle.load(stream)
    print('Done  loading: ', fname)
    print('      samples: {:4d}'.format(len(ds['query'])))
    print('   vocab_size: {:4d}'.format(len(dicts['token_ids'])))
    print('   slot count: {:4d}'.format(len(dicts['slot_ids'])))
    print(' intent count: {:4d}'.format(len(dicts['intent_ids'])))
    return ds,dicts

In [82]:
test_ds, test_dicts  = load_ds(os.path.join('./','atis.test.pkl'))


Done  loading:  ./atis.test.pkl
      samples:  893
   vocab_size:  943
   slot count:  129
 intent count:   26


In [109]:
test_ds.keys()

dict_keys(['slot_labels', 'query', 'intent_labels'])

In [83]:
train_ds, train_dicts  = load_ds(os.path.join('./','atis.train.pkl'))

Done  loading:  ./atis.train.pkl
      samples: 4978
   vocab_size:  943
   slot count:  129
 intent count:   26


In [63]:
class TokenProcessor(object):
    def __init__(self, token_ids):
        self.token2id = token_ids
        self.id2token = {self.token2id[k]:k for k in self.token2id}
        
    def get_token(self, id_):
        return self.id2token[id_]

In [64]:
class SlotProcessor(object):
    def __init__(self, slot_ids):
        self.slot2id = slot_ids
        self.id2slot = {self.slot2id[k]:k for k in self.slot2id}
        
    def get_slot(self, id_):
        return self.id2slot[id_]

In [65]:
class IntentProcessor(object):
    def __init__(self, intent_ids):
        self.intent2id = intent_ids
        self.id2intent = {self.intent2id[k]:k for k in self.intent2id}
        
    def get_intent(self, id_):
        return self.id2intent[id_]

In [112]:
train_dicts['token_ids']

{"'d": 0,
 "'hare": 1,
 "'ll": 2,
 "'m": 3,
 "'re": 4,
 "'s": 5,
 "'t": 6,
 "'ve": 7,
 '0900': 8,
 '1': 9,
 '10': 10,
 '100': 11,
 '1000': 12,
 '1017': 13,
 '1020': 14,
 '1024': 15,
 '1026': 16,
 '1030': 17,
 '1039': 18,
 '1045': 19,
 '1055': 20,
 '1059': 21,
 '106': 22,
 '1083': 23,
 '11': 24,
 '110': 25,
 '1100': 26,
 '1110': 27,
 '1115': 28,
 '1130': 29,
 '1133': 30,
 '1145': 31,
 '1158': 32,
 '12': 33,
 '1200': 34,
 '1201': 35,
 '1205': 36,
 '1207': 37,
 '1209': 38,
 '1220': 39,
 '1222': 40,
 '1230': 41,
 '124': 42,
 '1245': 43,
 '1288': 44,
 '1291': 45,
 '130': 46,
 '1300': 47,
 '137338': 48,
 '139': 49,
 '150': 50,
 '1500': 51,
 '1505': 52,
 '1600': 53,
 '163': 54,
 '1700': 55,
 '1765': 56,
 '1800': 57,
 '1850': 58,
 '19': 59,
 '1940': 60,
 '1991': 61,
 '1992': 62,
 '1993': 63,
 '1994': 64,
 '2': 65,
 '20': 66,
 '200': 67,
 '201': 68,
 '21': 69,
 '210': 70,
 '2100': 71,
 '212': 72,
 '2134': 73,
 '2153': 74,
 '217': 75,
 '225': 76,
 '229': 77,
 '230': 78,
 '257': 79,
 '269': 80,
 

In [92]:
token_processor = TokenProcessor(train_dicts['token_ids'])

In [98]:
slot_processor = SlotProcessor(train_dicts['slot_ids'])

In [104]:
intent_processor = IntentProcessor(train_dicts['intent_ids'])

In [71]:
query_test, slots_test, intent_test =  map(test_ds.get, ['query', 'slot_labels', 'intent_labels'])

In [78]:
query_train, slots_train, intent_train =  map(train_ds.get, ['query', 'slot_labels', 'intent_labels'])

In [79]:
[token_processor.get_token(each) for each in query_train[0]]

['BOS',
 'i',
 'want',
 'to',
 'fly',
 'from',
 'boston',
 'at',
 '838',
 'am',
 'and',
 'arrive',
 'in',
 'denver',
 'at',
 '1110',
 'in',
 'the',
 'morning',
 'EOS']

In [111]:
intent_train

[array([14]),
 array([14]),
 array([19]),
 array([3]),
 array([3]),
 array([14]),
 array([1]),
 array([14]),
 array([14]),
 array([21]),
 array([14]),
 array([14]),
 array([8]),
 array([14]),
 array([14]),
 array([3]),
 array([21]),
 array([14]),
 array([14]),
 array([14]),
 array([14]),
 array([14]),
 array([14]),
 array([1]),
 array([3]),
 array([14]),
 array([6]),
 array([14]),
 array([21]),
 array([14]),
 array([3]),
 array([14]),
 array([14]),
 array([14]),
 array([14]),
 array([3]),
 array([6]),
 array([14]),
 array([14]),
 array([14]),
 array([13]),
 array([14]),
 array([6]),
 array([6]),
 array([14]),
 array([6]),
 array([21]),
 array([0]),
 array([14]),
 array([14]),
 array([19]),
 array([14]),
 array([14]),
 array([20]),
 array([14]),
 array([0]),
 array([14]),
 array([14]),
 array([14]),
 array([14]),
 array([14]),
 array([6]),
 array([14]),
 array([21]),
 array([6]),
 array([14]),
 array([14]),
 array([8]),
 array([14]),
 array([14]),
 array([0]),
 array([14]),
 array([14])