In [33]:
import pandas as pd
import json
import sys

pd.set_option('display.max_colwidth', -1)

In [2]:
# nice JSON viewer, from https://stackoverflow.com/questions/18873066/pretty-json-formatting-in-ipython-notebook
import uuid
from IPython.display import display_javascript, display_html, display
import json

class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, dict):
            self.json_str = json.dumps(json_data)
        else:
            self.json_str = json_data
        self.uuid = str(uuid.uuid4())

    def _ipython_display_(self):
        display_html('<div id="{}" style="height: 600px; width:100%;"></div>'.format(self.uuid), raw=True)
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
        document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True)

In [4]:
sys.path.append('./')
from _dstc2_scripts.dataset_walker import dataset_walker
from _dstc2_scripts import misc

In [7]:
dataset = dataset_walker("dstc2_dev", dataroot="_Data/", labels=True)

In [53]:
this_dial_id = 3

for m, call in enumerate(dataset):
    if m == this_dial_id:
        for n, (turn, label) in enumerate(call):
            print '-' * 20, n, '-' * 20
            #display(RenderJSON(turn))
            #display(RenderJSON(turn['output']))
            #display(RenderJSON(label))
            print json.dumps(turn['output'], indent=4)
            print json.dumps(label, indent=4)
    elif m > this_dial_id:
        break

-------------------- 0 --------------------
{
    "start-time": 0.001117, 
    "end-time": 8.94, 
    "transcript": "Hello , welcome to the Cambridge restaurant system? You can ask for restaurants by area , price range or food type . How may I help you?", 
    "dialog-acts": [
        {
            "slots": [], 
            "act": "welcomemsg"
        }
    ], 
    "aborted": false
}
{
    "turn-index": 0, 
    "goal-labels": {
        "pricerange": "expensive"
    }, 
    "transcription": "expensive restaurant", 
    "method-label": "byconstraints", 
    "audio-file": "pt344x_0000894_0001028.wav", 
    "requested-slots": [], 
    "semantics": {
        "json": [
            {
                "slots": [
                    [
                        "pricerange", 
                        "expensive"
                    ]
                ], 
                "act": "inform"
            }
        ], 
        "cam": "inform(pricerange=expensive,type=restaurant)"
    }
}
--------------------

In [35]:
def json_to_cam(da_json):
    full_act = []
    for this_da in da_json:
        this_act = this_da['act']
        slots = []
        for this_slot in this_da['slots']:
            slots.append('{}={}'.format(this_slot[0], this_slot[1]))
        full_act.append('{}({})'.format(this_act, ', '.join(slots)))
    return ' | '.join(full_act)

In [36]:
out = []
for m, call in enumerate(dataset):
    for n, (turn, label) in enumerate(call):
        this_turn = []
        # print '-' * 20, n, '-' * 20
        sys_utt = turn['output']['transcript']
        usr_utt = label['transcription']
        sys_sem = json_to_cam(turn['output']['dialog-acts'])
        usr_sem = label['semantics']['cam'].replace(',', ', ')
        out.append((m, n, sys_utt, sys_sem, usr_utt, usr_sem))
        #display(RenderJSON(turn['output']))
        #display(RenderJSON(label))
        #print json.dumps(turn['output'], indent=4)
        #print json.dumps(label, indent=4)

In [38]:
columns = 'dial_id turn_id sys_utt sys_sem usr_utt usr_sem'.split()
dstc_df = pd.DataFrame(out, columns=columns)

In [39]:
dstc_df.head()

Unnamed: 0,dial_id,turn_id,sys_utt,sys_sem,usr_utt,usr_sem
0,0,0,"Hello , welcome to the Cambridge restaurant system? You can ask for restaurants by area , price range or food type . How may I help you?",welcomemsg(),i would like to find an expensive restaurant in the south part,"inform(pricerange=expensive, type=restaurant, area=south)"
1,0,1,What kind of food would you like?,request(slot=food),does not matter,inform(=dontcare)
2,0,2,There are restaurants in the expensive price range and the south of town . What type of food would you like?,request(slot=food) | impl-conf(pricerange=expensive) | impl-conf(area=south),any type of food is okay,inform(food=dontcare)
3,0,3,taj tandoori is a nice restaurant in the south of town serving indian food,offer(name=taj tandoori) | inform(food=indian) | inform(pricerange=expensive) | inform(area=south),what is the address,request(addr)
4,0,4,"Sure , taj tandoori is on 64 Cherry Hinton Road Cherry Hinton",offer(name=taj tandoori) | inform(addr=64 cherry hinton road cherry hinton),what is the phone number,request(phone)


In [49]:
dstc_df[dstc_df['dial_id'] == 9]

Unnamed: 0,dial_id,turn_id,sys_utt,sys_sem,usr_utt,usr_sem
76,9,0,"Hello , welcome to the Cambridge restaurant system? You can ask for restaurants by area , price range or food type . How may I help you?",welcomemsg(),im looking for spanish food,"inform(food=spanish, task=find)"
77,9,1,la raza serves spanish food .,offer(name=la raza) | inform(food=spanish),what is their address,request(addr)
78,9,2,"Sure , la raza is on 4 - 6 Rose Crescent",offer(name=la raza) | inform(addr=4 - 6 rose crescent),what is their phone number,request(phone)
79,9,3,The phone number of la raza is 01223 464550 .,offer(name=la raza) | inform(phone=01223 464550),thank you good bye,thankyou()|bye()


In [43]:
with open('_dstc2_scripts/config/ontology_dstc2.json', 'r') as f:
    ontology = json.load(f)

In [44]:
ontology

{u'informable': {u'area': [u'centre', u'north', u'west', u'south', u'east'],
  u'food': [u'afghan',
   u'african',
   u'afternoon tea',
   u'asian oriental',
   u'australasian',
   u'australian',
   u'austrian',
   u'barbeque',
   u'basque',
   u'belgian',
   u'bistro',
   u'brazilian',
   u'british',
   u'canapes',
   u'cantonese',
   u'caribbean',
   u'catalan',
   u'chinese',
   u'christmas',
   u'corsica',
   u'creative',
   u'crossover',
   u'cuban',
   u'danish',
   u'eastern european',
   u'english',
   u'eritrean',
   u'european',
   u'french',
   u'fusion',
   u'gastropub',
   u'german',
   u'greek',
   u'halal',
   u'hungarian',
   u'indian',
   u'indonesian',
   u'international',
   u'irish',
   u'italian',
   u'jamaican',
   u'japanese',
   u'korean',
   u'kosher',
   u'latin american',
   u'lebanese',
   u'light bites',
   u'malaysian',
   u'mediterranean',
   u'mexican',
   u'middle eastern',
   u'modern american',
   u'modern eclectic',
   u'modern european',
   u'modern