In [1]:
import os
from operator import itemgetter
from collections import defaultdict
import re

from hcn.modules.util import read_dialogs
from hcn.modules.entities import EntityTracker
from hcn.modules.actions import ActionTracker

In [2]:
BABI_FOLDER = os.path.join('hcn', 'data')

In [3]:
def mine_custom_slot_values(in_candidates, in_kb):
    for utterance in in_candidates:
        for slot_name, regexps in SLOT_REGEXPS.items():
            for regex in regexps:
                in_kb[slot_name].update(re.findall(regex, utterance))                

In [4]:
with open(os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-candidates.txt')) as cand_in:
    candidates = [line.strip().lower().partition(' ')[2] for line in cand_in]

In [5]:
train_dialogs, train_indices = read_dialogs(os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-trn.txt'),
                                            with_indices=True)
dev_dialogs, dev_indices = read_dialogs(os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-dev.txt'),
                                        with_indices=True)
test_dialogs, test_indices = read_dialogs(os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-tst.txt'),
                                          with_indices=True)

In [6]:
SLOT_REGEXPS = {'R_cuisine': ['you are looking for a (\w+) restaurant right?'],
                'R_name': ['the post code of (\w+) is \w+',
                           '^(.+) is a great restaurant',
                           '(.+) is a good restaurant'],
                'R_address': ['^the address of .+ is (.+) .$',
                              'is a good restaurant on (.+) and'],
                'R_phone': ['their phone number is (.+) .'],
                'R_post_code': [', at (.+)$', 'and their post code is (.+)']
}

In [7]:
kb = defaultdict(lambda: set([]))
with open(os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-kb.txt'), encoding='utf-8') as kb_in:
    for line in kb_in:
        idx, rest_name, key, value = line.split()
        kb[key].add(value)
        kb['R_name'].add(rest_name)

In [8]:
mine_custom_slot_values(candidates, kb)

In [9]:
kb['R_post_code']

{'ali_baba_post_code',
 'anatolia_post_code',
 'ask_post_code',
 'backstreet_bistro_post_code',
 'bangkok_city_post_code',
 'bedouin_post_code',
 'bloomsbury_restaurant_post_code',
 'c.b 1, 3 n.f',
 'c.b 2, 1 d.p',
 'c.b 2, 1 u.f',
 'c.b 4, 3 l.e',
 'caffe_uno_post_code',
 'cambridge_lodge_restaurant_post_code',
 'charlie_chan_post_code',
 'chiquito_restaurant_bar_post_code',
 'city_stop_restaurant_post_code',
 'clowns_cafe_post_code',
 'cocum_post_code',
 'cote_post_code',
 'cotto_post_code',
 'curry_garden_post_code',
 'curry_king_post_code',
 'curry_prince_post_code',
 'curry_queen_post_code',
 'da_vince_pizzeria_post_code',
 'da_vinci_pizzeria_post_code',
 'darrys_cookhouse_and_wine_shop_post_code',
 'de_luca_cucina_and_bar_post_code',
 'dojo_noodle_bar_post_code',
 'don_pasquale_pizzeria_post_code',
 'efes_restaurant_post_code',
 'eraina_post_code',
 'fitzbillies_restaurant_post_code',
 'frankie_and_bennys_post_code',
 'galleria_post_code',
 'golden_house_post_code',
 'golden_wok_

In [10]:
et = EntityTracker(kb)
at = ActionTracker(os.path.join(BABI_FOLDER, 'dialog-babi-task6-dstc2-candidates.txt'), et)

In [11]:
at.action_templates

['_PAD',
 '_UNK',
 ' you are welcome',
 'R_name is a good restaurant on R_address and it is in the R_price price range',
 'R_name is a great restaurant',
 'R_name is a great restaurant serving R_cuisine food . their phone number is R_phone .',
 'R_name is a great restaurant serving R_cuisine food and it is in the R_price price range',
 'R_name is a great restaurant serving R_price R_cuisine food in the R_location of town .',
 'R_name is a nice place in the R_location of town',
 'R_name is a nice place in the R_location of town and the prices are R_price',
 'R_name is a nice place in the R_location of town serving tasty R_cuisine food',
 'R_name is a nice restaurant in the R_location of town in the R_price price range',
 'R_name is a nice restaurant in the R_location of town serving R_cuisine food',
 'R_name is in the R_location , at R_post_code',
 'R_name is in the R_location part of town .',
 'R_name is in the R_price price range',
 'R_name is in the R_price price range , and their po

bAbI Task 6 Dialog Actions Distribution
==

In [17]:
dialog_actions_freqdict = defaultdict(lambda: 0)

for dialog_indices in train_indices:
    dialog_start, dialog_end = dialog_indices['start'], dialog_indices['end']
    dialog = train_dialogs[dialog_start: dialog_end]
    et.reset()
    for usr_utterance, sys_utterance in dialog:
        sys_utterance_delexicalized = et.extract_entities(sys_utterance)
        dialog_actions_freqdict[sys_utterance_delexicalized] += 1

In [21]:
for key, value in sorted(dialog_actions_freqdict.items(), key=lambda x: x[1], reverse=True):
    print('{}:\t{}'.format(key, value))

hello , welcome to the cambridge restaurant system . you can R_name for restaurants by area , price range or food type . how may i help you ?:	1618
 you are welcome:	1618
the phone number of R_name is R_phone:	1167
sure , R_name is on R_address:	962
what kind of food would you like?:	862
what part of town do you have in mind?:	636
R_name is a nice place in the R_location of town and the prices are R_price:	579
api_call r_cuisine R_location R_price:	540
i'm sorry but there is no restaurant serving R_cuisine food:	510
R_name is a great restaurant:	471
R_name serves R_cuisine food:	467
api_call R_cuisine r_location r_price:	442
R_name is in the R_price price range:	336
would you like something in the R_price , R_price , or R_price price range?:	324
api_call R_cuisine r_location R_price:	285
R_name is a nice place in the R_location of town:	261
api_call R_cuisine R_location r_price:	251
the post code of R_name is R_post_code:	242
R_name is a nice place in the R_location of town serving tas

In [28]:
noisy_train_dialogs, noisy_train_indices = read_dialogs(os.path.join('data', 'babi_task6_ood_dataset_v2_0.2_0.4', 'dialog-babi-task6-dstc2-trn.txt'),
                                                        with_indices=True)

In [29]:
noisy_dialog_actions_freqdict = defaultdict(lambda: 0)

for dialog_indices in noisy_train_indices:
    dialog_start, dialog_end = dialog_indices['start'], dialog_indices['end']
    dialog = noisy_train_dialogs[dialog_start: dialog_end]
    et.reset()
    for usr_utterance, sys_utterance in dialog:
        sys_utterance_delexicalized = et.extract_entities(sys_utterance)
        noisy_dialog_actions_freqdict[sys_utterance_delexicalized] += 1

In [30]:
for key, value in sorted(noisy_dialog_actions_freqdict.items(), key=lambda x: x[1], reverse=True):
    print('{}:\t{}'.format(key, value))

sorry i didn't catch that. could you please repeat?:	3533
hello , welcome to the cambridge restaurant system . you can R_name for restaurants by area , price range or food type . how may i help you ?:	1618
 you are welcome:	1618
the phone number of R_name is R_phone:	1167
sure , R_name is on R_address:	962
what kind of food would you like?:	862
what part of town do you have in mind?:	636
R_name is a nice place in the R_location of town and the prices are R_price:	579
api_call r_cuisine R_location R_price:	540
i'm sorry but there is no restaurant serving R_cuisine food:	510
R_name is a great restaurant:	471
R_name serves R_cuisine food:	467
api_call R_cuisine r_location r_price:	442
R_name is in the R_price price range:	336
would you like something in the R_price , R_price , or R_price price range?:	324
api_call R_cuisine r_location R_price:	285
R_name is a nice place in the R_location of town:	261
api_call R_cuisine R_location r_price:	251
the post code of R_name is R_post_code:	242
R_

In [36]:
user_utterances_freqdict = defaultdict(lambda: 0)

for dialog_indices in train_indices:
    dialog_start, dialog_end = dialog_indices['start'], dialog_indices['end']
    dialog = train_dialogs[dialog_start: dialog_end]
    et.reset()
    for usr_utterance, sys_utterance in dialog:
        usr_utterance_delexicalized = et.extract_entities(usr_utterance)
        user_utterances_freqdict[usr_utterance_delexicalized] += 1

In [37]:
for key, value in sorted(user_utterances_freqdict.items(), key=lambda x: x[1], reverse=True):
    print('{}:\t{}'.format(key, value))

<silence>:	3882
thank you good bye:	1264
R_cuisine food:	459
phone number:	438
R_cuisine:	406
yes:	298
address:	292
i dont care:	283
R_location:	242
R_price:	187
thank you goodbye:	186
whats the address:	177
how about R_cuisine food:	173
what is the address:	159
any:	150
is there anything else:	134
what is the phone number:	134
whats the phone number:	120
dont care:	100
anything else:	91
post code:	86
how about R_cuisine:	78
no:	70
it doesnt matter:	69
R_location part of town:	63
what about R_cuisine food:	63
good bye:	63
doesnt matter:	61
price range:	54
and the phone number:	52
thank you:	50
R_price restaurant:	49
what type of food:	47
im looking for a R_price restaurant in the R_location part of town:	45
im looking for a R_pricely priced restaurant in the R_location part of town:	44
can i have the address:	44
what is the address and phone number:	43
type of food:	39
im looking for R_cuisine food:	39
right:	31
okay thank you good bye:	29
and the post code:	29
can i have the phone num

okay might i have the address:	1
i need a restaurant in the center of town that includes R_cuisine food:	1
thank you have a good day:	1
thank you can i have the phone number:	1
is there another alternative:	1
um is there anything else:	1
um i dont care about the price:	1
i need the phone number and post code:	1
i dont care about the price range what about R_cuisine food:	1
hi im looking for a R_price restaurant in the R_location part of town:	1
okay is that an R_price restaurant:	1
how about R_pricely priced:	1
where is the address:	1
im looking for a R_pricely priced rest:	1
opean:	1
i want to find a restaurant in the R_location part of town and it should serve:	1
okay how about an R_price:	1
cross over food:	1
the R_location side:	1
you said R_cuisine food:	1
airitran:	1
the adra:	1
can i find a restaurant that serves R_cuisine food:	1
im looking for barbecue food:	1
find me a R_pricely priced restaurant in the R_location part of town:	1
i am looking fora restaurant that should serve

In [40]:
noisy_user_utterances_freqdict = defaultdict(lambda: 0)

for dialog_indices in noisy_train_indices:
    dialog_start, dialog_end = dialog_indices['start'], dialog_indices['end']
    dialog = noisy_train_dialogs[dialog_start: dialog_end]
    et.reset()
    for usr_utterance, sys_utterance in dialog:
        usr_utterance_delexicalized = et.extract_entities(usr_utterance)
        noisy_user_utterances_freqdict[usr_utterance_delexicalized] += 1

In [42]:
for key, value in sorted(noisy_user_utterances_freqdict.items(), key=lambda x: x[1], reverse=True):
    print('{}:\t{}'.format(key, value))

<silence>:	3882
thank you good bye:	1017
R_cuisine food:	385
phone number:	344
R_cuisine:	330
i dont care:	231
yes:	228
address:	227
R_location:	177
R_price:	152
how about R_cuisine food:	150
thank you goodbye:	144
whats the address:	139
any:	124
what is the address:	117
is there anything else:	109
what is the phone number:	99
whats the phone number:	93
dont care:	88
anything else:	75
post code:	67
how about R_cuisine:	62
no:	58
it doesnt matter:	52
what about R_cuisine food:	51
R_location part of town:	49
doesnt matter:	49
good bye:	44
and the phone number:	43
thank you:	42
R_price restaurant:	41
price range:	41
what type of food:	41
what is the address and phone number:	37
im looking for a R_pricely priced restaurant in the R_location part of town:	36
im looking for a R_price restaurant in the R_location part of town:	35
can i have the address:	33
type of food:	32
im looking for R_cuisine food:	29
and the post code:	27
right:	27
what is the price range:	27
R_pricely priced:	25
okay t

can you check guadalajara first? i'll be leaving from curitiba:	1
i'm sorry... thank you goodbye:	1
what about us:	1
you are correct. my mistake. uh looking for a restaurant in any area and that needs to serve:	1
what is available:	1
sorry for the conR_cuisine. yes:	1
hey sorry i mentioned it R_cuisine food:	1
sorry comrade lol R_cuisine:	1
uh whats the phone number:	1
yes i need the sixty one a schedule leaving wilkinsburg going to downtown:	1
yes sir, sorry sir. i would like a R_price restaurant in the R_location part of town:	1
yes i wanted to know what time the next sixty one c leaving mckeesport transportation center is:	1
sorry... :( thank you good bye:	1
i wanna know where theyre routing the fifty one c from R_location side:	1
ooo sorry .. R_location:	1
im sorry fam :x thank you good bye:	1
sorry i’ll get on that okay can i have the address:	1
im sorry bud. i dont care:	1
do they have R_cuisine food:	1
okay do you have their phone number:	1
i need to know what time the next sixt

and may i have the post code:	1
okay then any part of town:	1
pst brother! sorry about that phone number:	1
what is the phone number of that R_name:	1
what is the address and phone number and area:	1
i am sorry friend 😭 R_location part of town:	1
hi going on vacation with my family can you help me book a vacation?:	1
i am so sorry brother! restaurant R_location part of town R_cuisine food:	1
whoops sorry R_location:	1
omg. i'm so sorry! im looking for an R_price restaurant that serves R_cuisine food:	1
can i change the price range:	1
what time is the next bus from cmu to squirell hill:	1
the type of food:	1
who is going to yoga and when is it?:	1
😕sorry champ thank you good bye:	1
whoops, my mistake! and the address:	1
address phone number area:	1
i want to find a restaurant:	1
im looking for an R_price restaurant:	1
when is the next sixty four a leaving highland aR_name and penn aR_name going to squirrel hill:	1
sorry not sure ok talian food:	1
sorry not sorry R_cuisine:	1
okay. my mi

uh yes can i find a restaurant in the R_location part of town that serves R_cuisine:	1
nope, sorry. nothing. whats the phone number:	1
i am so sorry 😥 whats the address:	1
i'm good, thanks! i'm looking to book a trip for a group of children with me as their chaperone.:	1
i need an R_price restaurant serving R_cuisine food:	1
hi i've got investors to meet with in nagoya sometime in the near future. i can leave from anywhere:	1
sorry about that mistake. im looking for a restaurant that serves R_cuisine food:	1
sorry guys. and the phone number:	1
that can serve R_cuisine food:	1
okay can i have the phone number and the post code please:	1
i need to get to rome! it’s a family emergency! i’m in tel aviv!:	1
could i have their address and phone number please:	1
ay i need to get my parents out of sl this month ok??? can u book them somewhere from st. louis til the 28th for 4300 bucks and get em out of here today if u can:	1
toronto please and from belo horizonte:	1
hi there, i'd like to book 