In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

## References

- [Natural Language Understanding with Sequence to Sequence Models](https://towardsdatascience.com/natural-language-understanding-with-sequence-to-sequence-models-e87d41ad258b)
- [BERT for dummies](https://towardsdatascience.com/bert-for-dummies-step-by-step-tutorial-fb90890ffe03)
- [ATIS Dataset from MS CNTK](https://www.kaggle.com/siddhadev/atis-dataset-from-ms-cntk)

In [2]:
import glob
import itertools

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
font_dirs = ['/usr/share/fonts/truetype/nanum']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)
font_list = font_manager.createFontList(font_files)
font_manager.fontManager.ttflist.extend(font_list)
plt.rcParams['font.family'] = 'NanumGothic'

In [3]:
from tutorial_bert_func import load_ds, load_atis

# load ATIS training dataset
t2i_train, s2i_train, in2i_train, i2t_train, i2s_train, i2in_train, input_tensor_train, target_tensor_train, query_data_train, intent_data_train, intent_data_label_train, slot_data_train = load_atis('atis.train.pkl')
data_train = [t2i_train, s2i_train, in2i_train, i2t_train, i2s_train, i2in_train, input_tensor_train, target_tensor_train, query_data_train, intent_data_train, intent_data_label_train, slot_data_train]
var_train = ['t2i_train', 's2i_train', 'in2i_train', 'i2t_train', 'i2s_train', 'i2in_train', 'input_tensor_train', 'target_tensor_train', 'query_data_train', 'intent_data_train', 'intent_data_label_train', 'slot_data_train']

# load ATIS testing dataset
t2i_test, s2i_test, in2i_test, i2t_test, i2s_test, i2in_test, input_tensor_test, target_tensor_test, query_data_test, intent_data_test, intent_data_label_test, slot_data_test = load_atis('atis.test.pkl')

Done  loading:  data/ms-cntk-atis/atis.train.pkl
      samples: 4978
   vocab_size:  943
   slot count:  129
 intent count:   26
Query text: BOS could you please give me the round trip fare from denver to atlanta EOS
Query vector:  [178 329 938 688 449 581 827 730 870 414 444 351 851 242 179]
Intent label:  airfare
Slot text:  O O O O O O O B-round_trip I-round_trip O O B-fromloc.city_name O B-toloc.city_name O
Slot vector:  [128, 128, 128, 128, 128, 128, 128, 66, 119, 128, 128, 48, 128, 78, 128]
**************************************************************************
Query text: BOS list the flights from salt lake city to st. petersburg EOS
Query vector:  [178 549 827 429 444 736 521 301 851 789 677 179]
Intent label:  flight
Slot text:  O O O O O B-fromloc.city_name I-fromloc.city_name I-fromloc.city_name O B-toloc.city_name I-toloc.city_name O
Slot vector:  [128, 128, 128, 128, 128, 48, 110, 110, 128, 78, 125, 128]
******************************************************************

In [4]:
import inspect

def retrieve_name(var):
    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
    return [var_name for var_name, var_val in callers_local_vars if var_val is var]


def check_data(var_list, variable_list) : 
    for var_name, variable in zip(var_list, variable_list) :
        print('\n%s'%var_name)

        if isinstance(variable, dict) : 
        
            print(dict(itertools.islice(variable.items(), 2)))
        elif isinstance(variable, list) : 
            print(variable[0:2])
        elif isinstance(variable, np.ndarray) : 
            print(variable[0:2])
        else :
            print('Unseen Type')
    return
check_data(var_train, data_train)


t2i_train
{"'d": 0, "'hare": 1}

s2i_train
{'B-aircraft_code': 0, 'B-airline_code': 1}

in2i_train
{'abbreviation': 0, 'aircraft': 1}

i2t_train
{0: "'d", 1: "'hare"}

i2s_train
{0: 'B-aircraft_code', 1: 'B-airline_code'}

i2in_train
{0: 'abbreviation', 1: 'aircraft'}

input_tensor_train
[array([178, 479, 902, 851, 431, 444, 266, 240, 168, 210, 215, 236, 482,
       351, 240,  27, 482, 827, 606, 179]), array([178, 916, 429, 228, 244, 444, 682, 851, 247, 654, 845, 606, 179])]

target_tensor_train
[[128, 128, 128, 128, 128, 128, 48, 128, 35, 100, 128, 128, 128, 78, 128, 14, 128, 128, 12, 128], [128, 128, 128, 128, 128, 128, 48, 128, 78, 128, 26, 33, 128]]

query_data_train
[' i want to fly from boston at 838 am and arrive in denver at 1110 in the morning '
 ' what flights are available from pittsburgh to baltimore on thursday morning ']

intent_data_train
['flight' 'flight']

intent_data_label_train
[14 14]

slot_data_train
[' O O O O O B-fromloc.city_name O B-depart_time.time I-depart_

In [5]:
with pd.option_context('display.max_colwidth', -1) : 
    df = pd.DataFrame({'query':query_data_train,
                      'intent':intent_data_train,
                      'slot filling':slot_data_train})
    df_small = pd.DataFrame(columns=['query', 'intent', 'slot filling'])
    j = 0
    for i in df.intent.unique() :
        df_small.loc[j] = df[df.intent==i].iloc[0]
        j = j+1

    display(df_small)

Unnamed: 0,query,intent,slot filling
0,i want to fly from boston at 838 am and arrive in denver at 1110 in the morning,flight,O O O O O B-fromloc.city_name O B-depart_time.time I-depart_time.time O O O B-toloc.city_name O B-arrive_time.time O O B-arrive_time.period_of_day
1,what is the arrival time in san francisco for the 755 am flight leaving washington,flight_time,O O O B-flight_time I-flight_time O B-fromloc.city_name I-fromloc.city_name O O B-depart_time.time I-depart_time.time O O B-fromloc.city_name
2,cheapest airfare from tacoma to orlando,airfare,B-cost_relative O O B-fromloc.city_name O B-toloc.city_name
3,what kind of aircraft is used on a flight from cleveland to dallas,aircraft,O O O O O O O O O O B-fromloc.city_name O B-toloc.city_name
4,what kind of ground transportation is available in denver,ground_service,O O O O O O O O B-city_name
5,what 's the airport at orlando,airport,O O O O O B-city_name
6,which airline serves denver pittsburgh and atlanta,airline,O O O B-fromloc.city_name B-fromloc.city_name O B-fromloc.city_name
7,how far is it from orlando airport to orlando,distance,O O O O O B-fromloc.airport_name I-fromloc.airport_name O B-toloc.city_name
8,what is fare code h,abbreviation,O O O O B-fare_basis_code
9,how much does the limousine service cost within pittsburgh,ground_fare,O O O O B-transport_type O O O B-city_name


In [6]:
i2s_train_values = list(i2s_train.values())
df = pd.DataFrame()
for i in range(7) :
    df[str(i)] = i2s_train_values[i*15:(i+1)*15]
df

Unnamed: 0,0,1,2,3,4,5,6
0,B-aircraft_code,B-arrive_time.time_relative,B-depart_date.year,B-flight_time,B-return_date.day_name,B-today_relative,I-arrive_time.time_relative
1,B-airline_code,B-booking_class,B-depart_time.end_time,B-fromloc.airport_code,B-return_date.day_number,B-toloc.airport_code,I-city_name
2,B-airline_name,B-city_name,B-depart_time.period_mod,B-fromloc.airport_name,B-return_date.month_name,B-toloc.airport_name,I-class_type
3,B-airport_code,B-class_type,B-depart_time.period_of_day,B-fromloc.city_name,B-return_date.today_relative,B-toloc.city_name,I-cost_relative
4,B-airport_name,B-compartment,B-depart_time.start_time,B-fromloc.state_code,B-return_time.period_mod,B-toloc.country_name,I-depart_date.day_name
5,B-arrive_date.date_relative,B-connect,B-depart_time.time,B-fromloc.state_name,B-return_time.period_of_day,B-toloc.state_code,I-depart_date.day_number
6,B-arrive_date.day_name,B-cost_relative,B-depart_time.time_relative,B-meal,B-round_trip,B-toloc.state_name,I-depart_date.today_relative
7,B-arrive_date.day_number,B-day_name,B-economy,B-meal_code,B-state_code,B-transport_type,I-depart_time.end_time
8,B-arrive_date.month_name,B-day_number,B-fare_amount,B-meal_description,B-state_name,I-airline_name,I-depart_time.period_of_day
9,B-arrive_date.today_relative,B-days_code,B-fare_basis_code,B-mod,B-stoploc.airport_code,I-airport_name,I-depart_time.start_time


In [None]:
from tutorial_seq2seq_func import create_tensors, get_vocab_size

input_data_train, teacher_data_train, target_data_train, len_input_train, len_target_train  = create_tensors(input_tensor_train, target_tensor_train)
input_data_test, teacher_data_test, target_data_test, len_input_test, len_target_test  = create_tensors(input_tensor_test, target_tensor_test, max_len=len_input_train)
vocab_in_size, vocab_out_size = get_vocab_size(t2i_train, t2i_test, s2i_train, s2i_test)

In [None]:
input_tensor_train[0:2]

In [None]:
input_data_train[0:2]

In [None]:
from tutorial_seq2seq_func import create_model
from keras.utils import plot_model

BATCH_SIZE = 64
model = create_model(input_data_train, len_input_train, vocab_in_size, vocab_out_size, BATCH_SIZE=64)
model.summary()
plot_model(model)

In [None]:
#from livelossplot.keras import PlotLossesCallback
from keras_tqdm import TQDMNotebookCallback

epochs = 50
hist = model.fit([input_data_train, teacher_data_train], target_data_train,
             batch_size=BATCH_SIZE,
             verbose=0,
             epochs=epochs,
             callbacks=[TQDMNotebookCallback()],
             validation_data=([input_data_test, teacher_data_test], target_data_test))

In [None]:
def plot_training_accuracy(history):

    acc = history.history['sparse_categorical_accuracy']
    val_acc = history.history['val_sparse_categorical_accuracy']

    epochs = range(1, len(acc) + 1)

    plt.plot(epochs, acc, 'bo', label='Training accuracy')
    plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.show()
    return
  
plot_training_accuracy(hist)