In [24]:
import time
import sys
import numpy
import collections

import dataset_walker
import ontology_reader
from baseline import BaselineMethod1, BaselineMethod2

In [25]:
DATASET = 'dstc5_dev'
DATAROOT = '../data'
TRACKFILE = 'baseline_dev.json'
ONTOLOGY_FILE = 'config/ontology_dstc5.json'
METHOD = '1'

ONTOLOGY = ontology_reader.OntologyReader(ONTOLOGY_FILE)

In [26]:
def get_dataset_info(in_dataset):
    stats = collections.defaultdict(lambda: 0)
    dataset = dataset_walker.dataset_walker(in_dataset, dataroot=DATAROOT, labels=True, translations=True)
    for call in dataset:
        stats['dialogs'] += 1
        for (utter, translations, labels) in call:
            stats['utterances'] += 1
            bio_tag = utter['segment_info']['target_bio']
            if bio_tag == 'B':
                stats['segments'] += 1
            if bio_tag != 'O':
                stats['informative_utterances'] += 1
    return stats

In [27]:
def print_ontology_info(in_ontology):
    print in_ontology
print dir (ONTOLOGY)
print ONTOLOGY.get_topics()
print ONTOLOGY.get_slots('TRANSPORTATION')
print ONTOLOGY.get_tagsets()['TRANSPORTATION']['FROM'][:10]
print ONTOLOGY.get_translated_tagsets()['TRANSPORTATION']['FROM'][:10]

['__doc__', '__init__', '__module__', 'get_pilot_tagsets', 'get_slots', 'get_tagsets', 'get_topics', 'get_translated_tagsets', 'get_translations', 'ontology', 'pilot_tagsets', 'tagsets', 'translations']
[u'FOOD', u'ATTRACTION', u'TRANSPORTATION', u'SHOPPING', u'ACCOMMODATION']
[u'INFO', u'FROM', u'TO', u'STATION', u'LINE', u'TYPE', u'TICKET']
[u'1-Altitude Gallery & Bar', u'112 Katong', u'2am Dessert Bar', u'313@Somerset', u'5footway.inn Project Ann Siang', u'5footway.inn Project Boat Quay', u'5footway.inn Project Bugis', u'5footway.inn Project Chinatown', u'5footway.inn Project Chinatown 2', u'7-Eleven']
[{'entry_en': u'1-Altitude Gallery & Bar', 'translated_cn': [u'1-altitude\u5eca\u53ca\u9152\u5427', u'1-altitude\u5eca\u53ca\u9152\u5427', u'1-altitude\u53ca\u9152\u5427', u'1-altitude\u5eca\u53ca\u5f8b\u5e08', u'1-altitude\u53ca\u5f8b\u5e08', u'1-altitude\u753b\u5eca\u53ca\u9152\u5427']}, {'entry_en': u'112 Katong', 'translated_cn': [u'\u7b2c112\u52a0\u4e1c', u'\u7b2c112\u52a0\u4e1c'

Train Set Info
==

In [28]:
for key, value in get_dataset_info('dstc5_train').iteritems():
    print '{}:\t{}'.format(key, value)

segments:	4296
informative_utterances:	25338
dialogs:	35
utterances:	31034


Dev Set Info
==

In [29]:
for key, value in get_dataset_info('dstc5_dev').iteritems():
    print '{}:\t{}'.format(key, value)

segments:	253
informative_utterances:	2189
dialogs:	2
utterances:	3130


In [30]:
print_ontology_info(ONTOLOGY)

<ontology_reader.OntologyReader instance at 0x1127dc830>


DSTC4 - Train Set Info
==

In [31]:
for key, value in get_dataset_info('dstc4_train').iteritems():
    print '{}:\t{}'.format(key, value)

segments:	1747
informative_utterances:	9974
dialogs:	14
utterances:	12759


DSTC4 - Dev Set Info
==

In [32]:
for key, value in get_dataset_info('dstc4_dev').iteritems():
    print '{}:\t{}'.format(key, value)

segments:	632
informative_utterances:	4139
dialogs:	6
utterances:	4812


DSTC4 - Test Set Info
==

In [33]:
for key, value in get_dataset_info('dstc4_test').iteritems():
    print '{}:\t{}'.format(key, value)

segments:	1147
informative_utterances:	6528
dialogs:	9
utterances:	7848


In [35]:
dir(ONTOLOGY)

['__doc__',
 '__init__',
 '__module__',
 'get_pilot_tagsets',
 'get_slots',
 'get_tagsets',
 'get_topics',
 'get_translated_tagsets',
 'get_translations',
 'ontology',
 'pilot_tagsets',
 'tagsets',
 'translations']

In [50]:
print ONTOLOGY.get_topics()
print ONTOLOGY.get_tagsets()
set([1, 2, 3]).update([2, 3])

[u'FOOD', u'ATTRACTION', u'TRANSPORTATION', u'SHOPPING', u'ACCOMMODATION']
{u'FOOD': {u'INFO': [u'Booking', u'Cuisine', u'Delivery', u'Dish', u'History', u'Image', u'Ingredient', u'Itinerary', u'Location', u'Opening hour', u'Place', u'Preference', u'Pricerange', u'Promotion', u'Restriction', u'Spiciness', u'Type of place'], u'CUISINE': [u'African cuisine', u'American cuisine', u'Asian cuisine', u'Australian cuisine', u'Barbecue', u'Beer', u'Beijing cuisine', u'Cantonese cuisine', u'Chinese Islamic cuisine', u'Chinese cuisine', u'Dessert', u'Drink', u'Egyptian cuisine', u'English cuisine', u'European cuisine', u'Filipino cuisine', u'Finger food', u'Foochow cuisine', u'French cuisine', u'Fruit', u'Fujian cuisine', u'Fusion', u'German cuisine', u'Greek cuisine', u'Hainanese cuisine', u'Hakka cuisine', u'Halal cuisine', u'Hokkien cuisine', u'Hong Kong cuisine', u'Indian cuisine', u'Indochinese cuisine', u'Indonesian cuisine', u'International cuisine', u'Italian cuisine', u'Japanese cuisine