## dataset : multiwoz21, sgd1, tm1

In [1]:
import os, sys
from collections import defaultdict

In [2]:
from convlab.util import load_dataset, load_ontology, load_database
from convlab.util import load_nlu_data, create_delex_data

In [3]:
dataset = load_dataset('multiwoz21')
ontology = load_ontology('multiwoz21')
database = load_database('multiwoz21')

In [9]:
ontology['domains']['general']

{'description': 'general domain without slots', 'slots': {}}

In [22]:
slots = defaultdict(dict)
for domain in ontology['domains']:
  for slot in ontology['domains'][domain]['slots']:
    description = ontology['domains'][domain]['slots'][slot]['description']
    is_categorical = ontology['domains'][domain]['slots'][slot]['is_categorical']
    if 'possible_values' in ontology['domains'][domain]['slots'][slot]:
      possible_values = ontology['domains'][domain]['slots'][slot]['possible_values']
    if possible_values:
      slot_dict = {'description': description, 'is_categorical': is_categorical, 'possible_values': possible_values}
    else:
      slot_dict = {'description': description, 'is_categorical': is_categorical}
    slots[domain][slot] = slot_dict

In [24]:
slots['general']

{}

In [5]:
state = {"hotel": {"area": "east", "price range": "moderate"}}
res = database.query("hotel", state, topk=3)

In [6]:
split_dataset = load_dataset('multiwoz21', split2ratio={"train": 0.01})

In [7]:
nlu_data = load_nlu_data(split_dataset, "test", "user")

In [8]:
dataset, delex_vocab = create_delex_data(split_dataset)

In [9]:
delex_vocab

['[(attraction)-(address)]',
 '[(attraction)-(area)]',
 '[(attraction)-(choice)]',
 '[(attraction)-(entrance fee)]',
 '[(attraction)-(name)]',
 '[(attraction)-(open hours)]',
 '[(attraction)-(phone)]',
 '[(attraction)-(postcode)]',
 '[(attraction)-(type)]',
 '[(hospital)-(address)]',
 '[(hospital)-(department)]',
 '[(hospital)-(phone)]',
 '[(hospital)-(postcode)]',
 '[(hotel)-(address)]',
 '[(hotel)-(area)]',
 '[(hotel)-(book day)]',
 '[(hotel)-(book people)]',
 '[(hotel)-(book stay)]',
 '[(hotel)-(choice)]',
 '[(hotel)-(internet)]',
 '[(hotel)-(name)]',
 '[(hotel)-(parking)]',
 '[(hotel)-(phone)]',
 '[(hotel)-(postcode)]',
 '[(hotel)-(price range)]',
 '[(hotel)-(ref)]',
 '[(hotel)-(stars)]',
 '[(hotel)-(type)]',
 '[(police)-(address)]',
 '[(police)-(name)]',
 '[(police)-(phone)]',
 '[(police)-(postcode)]',
 '[(restaurant)-(address)]',
 '[(restaurant)-(area)]',
 '[(restaurant)-(book day)]',
 '[(restaurant)-(book people)]',
 '[(restaurant)-(book time)]',
 '[(restaurant)-(choice)]',
 '[(

In [7]:
# export OPENAI_API_KEY="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"
os.environ['OPENAI_API_KEY'] = "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"
from convlab.base_models.llm.nlu import LLM_NLU

In [11]:
texts = [
  "I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "I want to leave after 17:15.",
  "Thank you for all the help! I appreciate it.",
  "Please find a restaurant called Nusha.",
  "I am not sure of the type of food but could you please check again and see if you can find it? Thank you.",
  "It's not a restaurant, it's an attraction. Nusha."
  ]
contexts = [
  [],
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "What time do you want to leave and what time do you want to arrive by?"],
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "What time do you want to leave and what time do you want to arrive by?",
  "I want to leave after 17:15.",
  "Booking completed! your taxi will be blue honda Contact number is 07218068540"],
  [],
  ["Please find a restaurant called Nusha.",
  "I don't seem to be finding anything called Nusha.  What type of food does the restaurant serve?"],
  ["Please find a restaurant called Nusha.",
  "I don't seem to be finding anything called Nusha.  What type of food does the restaurant serve?",
  "I am not sure of the type of food but could you please check again and see if you can find it? Thank you.",
  "Could you double check that you've spelled the name correctly? The closest I can find is Nandos."]
]
dataset = load_dataset('multiwoz21')
example_dialogs = dataset['train'][:3]
nlu = LLM_NLU(dataset_name='multiwoz21', api_type='openai', model_name_or_path='gpt-3.5-turbo', speaker='user', example_dialogs=example_dialogs)
# nlu = LLM_NLU('multiwoz21', 'huggingface', 'Llama-2-7b-chat-hf', 'user', example_dialogs)
for text, context in zip(texts, contexts):
  # print(text)
  print(nlu.predict(text, context))
  print('-'*50)

You are an excellent dialogue acts parser. Dialogue acts are used to represent the intention of the speaker. Dialogue acts are a list of tuples, each tuple is in the form of (intent, domain, slot, value). The "intent", "domain", "slot" are defines as follows:

"intents": {
    "inform": "inform the value of a slot",
    "request": "ask for the value of a slot",
    "nobook": "inform the user that the booking is failed",
    "reqmore": "ask the user for more instructions",
    "book": "book something for the user",
    "bye": "say goodbye to the user and end the conversation",
    "thank": "thanks for the help",
    "welcome": "you're welcome",
    "greet": "express greeting",
    "recommend": "recommend a choice to the user",
    "select": "provide several choices for the user",
    "offerbook": "ask the user if he or she needs booking",
    "offerbooked": "provide information about the booking",
    "nooffer": "inform the user that there is no result satisfies user requirements"
}

"d

In [14]:
texts = [
  "I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "I want to leave after 17:15.",
  "Thank you for all the help! I appreciate it.",
  "Please find a restaurant called Nusha.",
  "I am not sure of the type of food but could you please check again and see if you can find it? Thank you.",
  "It's not a restaurant, it's an attraction. Nusha."
  ]
contexts = [
  [],
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "What time do you want to leave and what time do you want to arrive by?"],
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "What time do you want to leave and what time do you want to arrive by?",
  "I want to leave after 17:15.",
  "Booking completed! your taxi will be blue honda Contact number is 07218068540"],
  [],
  ["Please find a restaurant called Nusha.",
  "I don't seem to be finding anything called Nusha.  What type of food does the restaurant serve?"],
  ["Please find a restaurant called Nusha.",
  "I don't seem to be finding anything called Nusha.  What type of food does the restaurant serve?",
  "I am not sure of the type of food but could you please check again and see if you can find it? Thank you.",
  "Could you double check that you've spelled the name correctly? The closest I can find is Nandos."]
]
dataset = load_dataset('multiwoz21')
example_dialogs = dataset['train'][:3]
nlu = LLM_NLU(dataset_name='multiwoz21', api_type='openai', model_name_or_path='gpt-4-1106-preview', speaker='user', example_dialogs=example_dialogs)
# nlu = LLM_NLU('multiwoz21', 'huggingface', 'Llama-2-7b-chat-hf', 'user', example_dialogs)
for text, context in zip(texts, contexts):
  # print(text)
  print(nlu.predict(text, context))
  print('-'*50)

You are an excellent dialogue acts parser. Dialogue acts are used to represent the intention of the speaker. Dialogue acts are a list of tuples, each tuple is in the form of (intent, domain, slot, value). The "intent", "domain", "slot" are defines as follows:

"intents": {
    "inform": "inform the value of a slot",
    "request": "ask for the value of a slot",
    "nobook": "inform the user that the booking is failed",
    "reqmore": "ask the user for more instructions",
    "book": "book something for the user",
    "bye": "say goodbye to the user and end the conversation",
    "thank": "thanks for the help",
    "welcome": "you're welcome",
    "greet": "express greeting",
    "recommend": "recommend a choice to the user",
    "select": "provide several choices for the user",
    "offerbook": "ask the user if he or she needs booking",
    "offerbooked": "provide information about the booking",
    "nooffer": "inform the user that there is no result satisfies user requirements"
}

"d

## NLU

In [5]:
import json
import random
random.seed(1234)
os.environ['OPENAI_API_KEY'] = "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"

from convlab.base_models.llm.base import LLM
from convlab.nlu import NLU

In [6]:
class LLM_NLU(NLU):
  def __init__(self, dataset_name, api_type, model_name_or_path, speaker, generation_kwargs=None):
    assert speaker in ['user', 'system']
    self.speaker = speaker
    self.opponent = 'system' if speaker == 'user' else 'user'
    self.ontology = load_ontology(dataset_name)
    self.slots = None
    self.initial_system_instruction = self.format_system_instruction(self.ontology)
    self.system_instruction = None
    # print(self.system_instruction)
    self.model = LLM(api_type, model_name_or_path, self.system_instruction, generation_kwargs)

  def format_system_instruction(self, ontology):
    intents = {intent: ontology['intents'][intent]['description'] for intent in ontology['intents']}
    # domains = {domain: '' for domain in ontology['domains']}
    self.slots = {domain: {
          slot: ontology['domains'][domain]['slots'][slot]['description'] 
          for slot in ontology['domains'][domain]['slots']
        } for domain in ontology['domains']}
    
    # categorical_slot_values = {domain: {
    #               slot: ontology['domains'][domain]['slots'][slot]['possible_values']
    #               for slot in ontology['domains'][domain]['slots'] if ontology['domains'][domain]['slots'][slot]['is_categorical']
    #             } for domain in ontology['domains']}
    
    # example = ''
    # for example_dialog in example_dialogs:
    #   for i, turn in enumerate(example_dialog['turns']):
    #     if turn['speaker'] == self.speaker:
    #       if i > 0:
    #         example += example_dialog['turns'][i-1]['speaker']+': '+example_dialog['turns'][i-1]['utterance']+'\n'
    #       example += turn['speaker']+': '+turn['utterance']+'\n'
    #       das = []
    #       for da_type in turn['dialogue_acts']:
    #         for da in turn['dialogue_acts'][da_type]:
    #           intent, slot, value = da.get('intent'), da.get('slot', ''), da.get('value', '')
    #           das.append((intent, slot, value))
    #       example += '<DA>'+json.dumps(das)+'</DA>'+'\n\n'
    
    system_instruction = "\n\n".join([
      """You are an excellent dialogue acts parser. Dialogue acts are used to represent the intention of the speaker. Dialogue acts are a list of tuples, each tuple is in the form of (intent, slot, value). The "intent", "slot" are defines as follows:""",
      '"intents": '+json.dumps(intents, indent=4),
      # '"domains": '+json.dumps(domains, indent=4),
      # '"domain2slots": '+json.dumps(self.slots, indent=4),
      # """Here are example dialogue acts:""",
      # example,
      # """Now consider the following dialogue. Please generate the dialogue acts of the last utterance of {}. Start with <DA> token and end with </DA> token. Example: "<DA>[["inform", "name": "abc"]]</DA>". Do not generate intents, slots that are not defined above.""".format(self.speaker)
    ])
        
    return system_instruction

  def predict(self, utterance, context, domains, example_dialogs, example_cnt=5):
    filter_slots = {}
    for domain in domains:
      filter_slots[domain] = self.slots[domain]
    # domain specific example
    example = ''
    for example_dialog in example_dialogs:
      for i, turn in enumerate(example_dialog['turns']):
        if turn['speaker'] == self.speaker:
          if i > 0:
            example += example_dialog['turns'][i-1]['speaker']+': '+example_dialog['turns'][i-1]['utterance']+'\n'
          example += turn['speaker']+': '+turn['utterance']+'\n'
          das = []
          for da_type in turn['dialogue_acts']:
            for da in turn['dialogue_acts'][da_type]:
              intent, slot, value = da.get('intent'), da.get('slot', ''), da.get('value', '')
              das.append((intent, slot, value))
          example += '<DA>'+json.dumps(das[:example_cnt])+'</DA>'+'\n\n'
    
    add_system_instruction = "\n\n".join([
      '"domain2slots": '+json.dumps(filter_slots, indent=4),
      """Here are example dialogue acts:""",
      example,
      """Now consider the following dialogue. Please generate the dialogue acts of the last utterance of {}. Start with <DA> token and end with </DA> token. Example: "<DA>[["inform", "name": "abc"]]</DA>". Do not generate intents, slots that are not defined above.""".format(self.speaker)
    ])
    self.system_instruction = self.initial_system_instruction+'\n\n'+add_system_instruction
    # print(self.system_instruction)
    self.model.set_system_instruction(self.system_instruction)
    prompt = ""
    for i, turn in enumerate(context[::-1][:1]):
      # only the last utterance of the opponent is used
      if i % 2 == 0:
        prompt = self.opponent+': '+turn+'\n' + prompt
      else:
        prompt = self.speaker+': '+turn+'\n' + prompt
    prompt += self.speaker+': '+utterance+'\n'
    # print('='*50)
    # print('prompt')
    # print(prompt)
    response = self.model.chat(prompt)
    self.model.clear_chat_history()
    # print('response')
    # print(response)
    # print('='*50)
    dialogue_acts = self.normalize_response_to_dialogue_acts(response)
    return dialogue_acts
  
  def normalize_response_to_dialogue_acts(self, response):
    start_token, end_token = "<DA>", "</DA>"
    start_idx = response.find(start_token)
    end_idx = response.find(end_token)
    if start_idx == -1 or end_idx == -1:
      return {}
    response = response[start_idx+len(start_token):end_idx].strip()
    if response == "":
      return {}
    try:
      dialogue_acts = json.loads(response)
    except json.decoder.JSONDecodeError:
      # print('JSONDecodeError')
      # print('*'*30)
      # print([response])
      # print('*'*30)
      return {}
    return dialogue_acts

In [188]:
dataset = load_dataset('multiwoz21')

In [190]:
len(dataset['train'][0]['turns'])

10

### Goal Description
{'description': "You are looking for a <span class='emphasis'>place to stay</span>. The hotel should be in the <span class='emphasis'>cheap</span> price range and should be in the type of <span class='emphasis'>hotel</span>. The hotel should <span class='emphasis'>include free parking</span> and should <span class='emphasis'>include free wifi</span>. Once you find the <span class='emphasis'>hotel</span> you want to book it for <span class='emphasis'>6 people</span> and <span class='emphasis'>3 nights</span> starting from <span class='emphasis'>tuesday</span>. If the booking fails how about <span class='emphasis'>2 nights</span>. Make sure you get the <span class='emphasis'>reference number</span>",
 'inform': {'hotel': {'type': 'hotel',
   'parking': 'yes',
   'price range': 'cheap',
   'internet': 'yes',
   'book stay': '3|2',
   'book day': 'tuesday',
   'book people': '6'}},
 'request': {'hotel': {}}}

In [36]:
# 'dataset', 'data_split', 'dialogue_id', 'original_id', 'domains', 'goal', 'turns'
dataset['train'][0]['turns'][0]

{'speaker': 'user',
 'utterance': 'am looking for a place to to stay that has cheap price range it should be in a type of hotel',
 'utt_idx': 0,
 'dialogue_acts': {'categorical': [{'intent': 'inform',
    'domain': 'hotel',
    'slot': 'price range',
    'value': 'cheap'}],
  'non-categorical': [{'intent': 'inform',
    'domain': 'hotel',
    'slot': 'type',
    'value': 'hotel',
    'start': 87,
    'end': 92}],
  'binary': []},
 'state': {'attraction': {'type': '', 'name': '', 'area': ''},
  'hotel': {'name': '',
   'area': '',
   'parking': '',
   'price range': 'cheap',
   'stars': '',
   'internet': '',
   'type': 'hotel',
   'book stay': '',
   'book day': '',
   'book people': ''},
  'restaurant': {'food': '',
   'price range': '',
   'name': '',
   'area': '',
   'book time': '',
   'book day': '',
   'book people': ''},
  'taxi': {'leave at': '',
   'destination': '',
   'departure': '',
   'arrive by': ''},
  'train': {'leave at': '',
   'destination': '',
   'day': '',
   'a

In [7]:
# get texts & contexts from dataset
# dialogue : single from dataset['train'] (either 'valid', 'test')
def get_texts_contexts(dialogue, depth=None):
  turns = dialogue['turns']
  texts = []
  utterances = []
  contexts = []
  for turn in turns:
    if turn['speaker'] == 'user':
      texts.append(turn['utterance'])
      contexts.append(utterances[:])
    utterances.append(turn['utterance'])
  if depth:
    contexts = [context[-depth::] for context in contexts]
  return texts, contexts

In [8]:
def get_example_dialoges(dataset, domains, cnt=3):
  example_dialogs = []
  train_dataset = dataset['train']
  for data in train_dataset:
    if sorted(data['domains']) == sorted(domains):
      example_dialogs.append(data)
  if len(example_dialogs) == 0:
    for data in train_dataset:
      if len(set(data['domains']).intersection(domains)) > 0:
        example_dialogs.append(data)
  return random.sample(example_dialogs, cnt)

In [122]:
dataset = load_dataset('multiwoz21')

# gpt_model : gpt-3.5-turbo, gpt-4-1106-preview
nlu = LLM_NLU(dataset_name='multiwoz21', api_type='openai', model_name_or_path='gpt-3.5-turbo', speaker='user')
# nlu = LLM_NLU('multiwoz21', 'huggingface', 'Llama-2-7b-chat-hf', 'user', example_dialogs)
test_data = dataset['train'][100]
texts, contexts = get_texts_contexts(test_data)
example_dialogs = get_example_dialoges(dataset, test_data['domains'])
for text, context in zip(texts, contexts):
  # print(text)
  # print(test_data['domains'], example_dialogs, text, context)
  print(nlu.predict(text, context, test_data['domains'], example_dialogs))
  print('-'*50)

You are an excellent dialogue acts parser. Dialogue acts are used to represent the intention of the speaker. Dialogue acts are a list of tuples, each tuple is in the form of (intent, slot, value). The "intent", "slot" are defines as follows:

"intents": {
    "inform": "inform the value of a slot",
    "request": "ask for the value of a slot",
    "nobook": "inform the user that the booking is failed",
    "reqmore": "ask the user for more instructions",
    "book": "book something for the user",
    "bye": "say goodbye to the user and end the conversation",
    "thank": "thanks for the help",
    "welcome": "you're welcome",
    "greet": "express greeting",
    "recommend": "recommend a choice to the user",
    "select": "provide several choices for the user",
    "offerbook": "ask the user if he or she needs booking",
    "offerbooked": "provide information about the booking",
    "nooffer": "inform the user that there is no result satisfies user requirements"
}

"domain2slots": {
  

In [9]:
dataset = load_dataset('multiwoz21')


In [33]:
for d in dataset['test']:
  if 'restaurant' in d['domains'] and len(d['domains']) == 1:
    utter = [turn['utterance'] for turn in d['turns']]
    break

In [36]:
for idx, u in enumerate(utter):
  if idx%2 == 0:
    print('user: ', u)
  else:
    print('system: ', u)

user:  Hi, I am looking for a cheap restaurant in the town centre please.
system:  There are quite a few cheap restaurants in the town center. Is there a particular cuisine you prefer? 
user:  I prefer creative food, please.
system:  So sorry, I'm not seeing any creative restaurants. Will a different cuisine do?
user:  How about one that serves indian food?
system:  We have 3 located in the centre. kohinoor, the gandhi, and mahal of cambridge. Do any of these fancy you?
user:  The Kohinoor sounds good. Can you reserve a table for 4 on Wednesday? Let's say... 18:30 for the time if it's available.
system:  I'm sorry, that isn't available.  Can I try a different day or time perhaps?
user:  Try 17:30, okay?
system:  Your reservation has been booked on Wednesday for 4 people at 17:30. Reference number is M3TW6MPA.


In [125]:

# gpt_model : gpt-3.5-turbo, gpt-4-1106-preview
nlu = LLM_NLU(dataset_name='sgd1', api_type='openai', model_name_or_path='gpt-3.5-turbo', speaker='user')
# nlu = LLM_NLU('multiwoz21', 'huggingface', 'Llama-2-7b-chat-hf', 'user', example_dialogs)
test_data = dataset['train'][100]
texts, contexts = get_texts_contexts(test_data)
example_dialogs = get_example_dialoges(dataset, test_data['domains'])
for text, context in zip(texts, contexts):
  # print(text)
  # print(test_data['domains'], example_dialogs, text, context)
  print(nlu.predict(text, context, test_data['domains'], example_dialogs))
  print('-'*50)

You are an excellent dialogue acts parser. Dialogue acts are used to represent the intention of the speaker. Dialogue acts are a list of tuples, each tuple is in the form of (intent, slot, value). The "intent", "slot" are defines as follows:

"intents": {
    "inform": "Inform the value for a slot.",
    "request": "Request the value of a slot.",
    "confirm": "Confirm the value of a slot before making a transactional service call.",
    "offer": "Offer a certain value for a slot to the user.",
    "notify_success": "Inform the user that their request was successful.",
    "notify_failure": "Inform the user that their request failed.",
    "inform_count": "Inform the number of items found that satisfy the user's request.",
    "offer_intent": "Offer a new intent to the user.",
    "req_more": "Asking the user if they need anything else.",
    "goodbye": "End the dialogue.",
    "inform_intent": "Express the desire to perform a certain task to the system.",
    "negate_intent": "Negate

KeyboardInterrupt: 

In [None]:
dataset = load_dataset('tm1')
# gpt_model : gpt-3.5-turbo, gpt-4-1106-preview
nlu = LLM_NLU(dataset_name='tm1', api_type='openai', model_name_or_path='gpt-3.5-turbo', speaker='user')
# nlu = LLM_NLU('multiwoz21', 'huggingface', 'Llama-2-7b-chat-hf', 'user', example_dialogs)
test_data = dataset['train'][100]
texts, contexts = get_texts_contexts(test_data)
example_dialogs = get_example_dialoges(dataset, test_data['domains'])
for text, context in zip(texts, contexts):
  # print(text)
  # print(test_data['domains'], example_dialogs, text, context)
  print(nlu.predict(text, context, test_data['domains'], example_dialogs, ))
  print('-'*50)

# DST

In [128]:
import json
import random
random.seed(1234)

from copy import deepcopy
from convlab.base_models.llm.base import LLM
from convlab.dst import DST

In [175]:
class LLM_DST(DST):
  def __init__(self, dataset_name, api_type, model_name_or_path, generation_kwargs=None):
    self.ontology = load_ontology(dataset_name)
    self.slot_descriptions = None
    self.categorical_slot_values = None
    self.init_system_instruction = self.format_system_instruction(self.ontology)
    # print(self.system_instruction)
    self.model = LLM(api_type, model_name_or_path, self.init_system_instruction, generation_kwargs)
    self.state_update = []

  def format_system_instruction(self, ontology):
    # From paper "ChatGPT for Zero-shot Dialogue State Tracking: A Solution or an Opportunity?"
    # http://arxiv.org/abs/2306.01386
    state = ontology['state']
    self.slot_descriptions = deepcopy(ontology['state'])
    self.categorical_slot_values = deepcopy(ontology['state'])
    
    for domain in state:
      for slot in state[domain]:
        self.slot_descriptions[domain][slot] = ontology['domains'][domain]['slots'][slot]['description']
        if ontology['domains'][domain]['slots'][slot]['is_categorical']:
          self.categorical_slot_values[domain][slot] = ontology['domains'][domain]['slots'][slot]['possible_values']
        else:
          self.categorical_slot_values[domain].pop(slot)
      if self.categorical_slot_values[domain] == {}:
        self.categorical_slot_values.pop(domain)
    
    system_instruction = "\n\n".join([
      """Consider the following list of concepts , called "slots" provided to you as a json dictionary.""",
      # "\"slots\": "+json.dumps(slot_descriptions, indent=4),
      "\"slots\": "+"{{slot_descriptions}}",
      """Some "slots" can only take a value from predefined list:""",
      # "\"categorical\": "+json.dumps(categorical_slot_values, indent=4),
      "\"categorical\": "+"{{categorical_slot_values}}",
      """Now consider the following dialogue between two parties called the "system" and "user". Can you tell me which of the "slots" were updated by the "user" in its latest response to the "system"?""",
      """Present the updates in **JSON** format, start with <JSON> token and end with </JSON> token. Example: "<JSON>{"hotel": {"name": "abc"}}</JSON>". **Do not forget the "}" token**. If no "slots" were updated, return an empty JSON dictionary. If a user does not seem to care about a discussed "slot" fill it with "dontcare"."""
    ])
        
    return system_instruction
  
  def format_turn_prompt(self, user_utterance, system_utterance):
    return '"system": "{}"\n"user": "{}"'.format(system_utterance, user_utterance)

  def normalize_response_to_state_update(self, response):
    start_token, end_token = "<JSON>", "</JSON>"
    start_idx = response.find(start_token)
    end_idx = response.find(end_token)
    if start_idx == -1 or end_idx == -1:
      return {}
    response = response[start_idx+len(start_token):end_idx].strip()
    if response == "":
      return {}
    try:
      state_update = json.loads(response)
    except json.decoder.JSONDecodeError:
      # print('JSONDecodeError')
      # print('*'*30)
      # print([response])
      # print('*'*30)
      return {}
    return state_update

  def update(self, user_action=None):
    assert user_action == None
    context = self.state['history']
    assert len(context) > 0
    if type(context[0]) is list:
      assert len(context[0]) > 1
      context = [item[1] for item in context]
    if len(context) % 2 == 0:
      # system/user/system/user
      assert context[0] == ''
    else:
      # first turn: empty system utterance
      context.insert(0, '')
    
    assert len(context)//2 >= len(self.state_update) + 1
    for i in range(len(self.state_update), len(context)//2):
      system_utterance = context[2*i]
      user_utterance = context[2*i+1]
      turn_prompt = self.format_turn_prompt(user_utterance, system_utterance)
      response = self.model.chat(turn_prompt)
      state_update = self.normalize_response_to_state_update(response)
      # print(turn_prompt)
      # print(response)
      # print(state_update)
      # print('---'*50)
      self.state_update.append(state_update)

    self.state['belief_state'] = deepcopy(self.ontology['state'])
    for state_update in self.state_update:
      for domain in state_update:
        if domain not in self.state['belief_state']:
          continue
        for slot in state_update[domain]:
          if slot not in self.state['belief_state'][domain]:
            continue
          self.state['belief_state'][domain][slot] = state_update[domain][slot]
    return self.state
  
  def init_session(self, domains):
    filter_slot_descriptions = {}
    filter_categorical_slot_values = {}
    for domain in domains:
      if domain in self.slot_descriptions:
        filter_slot_descriptions[domain] = self.slot_descriptions[domain]
      if domain in self.categorical_slot_values:
        filter_categorical_slot_values[domain] = self.categorical_slot_values[domain]
    system_instruction = self.init_system_instruction.replace('{{slot_descriptions}}', json.dumps(filter_slot_descriptions, indent=4))
    if filter_categorical_slot_values:
      system_instruction = system_instruction.replace('{{categorical_slot_values}}', json.dumps(filter_categorical_slot_values, indent=4))
    else:
      system_instruction = system_instruction.replace('Some "slots" can only take a value from predefined list:\n\n', "")
      system_instruction = system_instruction.replace("\"categorical\": "+"{{categorical_slot_values}}\n\n", "")      
    print(system_instruction)
    self.state = dict()
    self.state['belief_state'] = deepcopy(self.ontology['state'])
    self.state['booked'] = dict()
    self.state['history'] = []
    self.state['system_action'] = []
    self.state['user_action'] = []
    self.state['terminated'] = False
    self.state_update = []
    self.model.set_system_instruction(system_instruction)
    self.model.clear_chat_history()

In [133]:
ontology = load_ontology('multiwoz21')
ontology.keys()

dict_keys(['domains', 'intents', 'state', 'dialogue_acts'])

In [156]:
# key : 'domains', 'intents', 'state', 'dialogue_acts'
ontology['domains']['taxi']

{'description': 'rent taxi to travel',
 'slots': {'destination': {'description': 'destination of taxi',
   'is_categorical': False,
   'possible_values': []},
  'departure': {'description': 'departure location of taxi',
   'is_categorical': False,
   'possible_values': []},
  'leave at': {'description': 'leaving time of taxi',
   'is_categorical': False,
   'possible_values': []},
  'arrive by': {'description': 'arrival time of taxi',
   'is_categorical': False,
   'possible_values': []},
  'phone': {'description': 'phone number of the taxi',
   'is_categorical': False,
   'possible_values': []},
  'type': {'description': 'car type of the taxi',
   'is_categorical': False,
   'possible_values': []}}}

In [166]:
# get texts & contexts from dataset
# dialogue : single from dataset['train'] (either 'valid', 'test')
def get_contexts(dialogue):
  turns = dialogue['turns']
  utterances = []
  contexts = []
  for turn in turns:
    utterances.append(turn['utterance'])
    if turn['speaker'] == 'user':
      contexts.append(utterances[:])
  return contexts

In [167]:
get_contexts(test_data)

[['I am looking for a place to grab a bite to eat.'],
 ['I am looking for a place to grab a bite to eat.',
  "What do you think you're hungry for?",
  "I'm looking for Mediterranean in Redwood City."],
 ['I am looking for a place to grab a bite to eat.',
  "What do you think you're hungry for?",
  "I'm looking for Mediterranean in Redwood City.",
  'There is only 1 restaurant I could find in Redwood City, name Rocknwraps And Kabobs.',
  'Do they have alcoholic drinks?'],
 ['I am looking for a place to grab a bite to eat.',
  "What do you think you're hungry for?",
  "I'm looking for Mediterranean in Redwood City.",
  'There is only 1 restaurant I could find in Redwood City, name Rocknwraps And Kabobs.',
  'Do they have alcoholic drinks?',
  "This restaurant doesn't serve liquor.",
  'Ok, I will go with that restaurant.'],
 ['I am looking for a place to grab a bite to eat.',
  "What do you think you're hungry for?",
  "I'm looking for Mediterranean in Redwood City.",
  'There is only 1 

In [172]:
dataset = load_dataset('multiwoz21')
test_data = dataset['train'][50]
domains = test_data['domains']

In [173]:
domains

['hotel', 'general']

In [176]:
contexts = get_contexts(test_data)
dst = LLM_DST('multiwoz21', 'openai', 'gpt-3.5-turbo')
dst.init_session(domains)
for context in contexts:
  dst.state['history'] = context
  # dst.update()
  print(dst.update())
  print('='*100)

Consider the following list of concepts , called "slots" provided to you as a json dictionary.

"slots": {
    "hotel": {
        "name": "name of the hotel",
        "area": "area or place of the hotel",
        "parking": "whether the hotel has parking",
        "price range": "price budget of the hotel",
        "stars": "star rating of the hotel",
        "internet": "whether the hotel has internet",
        "type": "what is the type of the hotel",
        "book stay": "length of stay at the hotel",
        "book day": "day of the hotel booking",
        "book people": "number of people for the hotel booking"
    }
}

Some "slots" can only take a value from predefined list:

"categorical": {
    "hotel": {
        "area": [
            "centre",
            "east",
            "north",
            "south",
            "west"
        ],
        "parking": [
            "free",
            "no",
            "yes"
        ],
        "price range": [
            "expensive",
          

# NLG

In [8]:
import json
import random
random.seed(1234)

from convlab.base_models.llm.base import LLM
from convlab.nlg import NLG

In [16]:
class LLM_NLG(NLG):
  def __init__(self, dataset_name, api_type, model_name_or_path, speaker, generation_kwargs=None):
    assert speaker in ['user', 'system']
    self.speaker = speaker
    self.opponent = 'system' if speaker == 'user' else 'user'
    self.ontology = load_ontology(dataset_name)
    self.slots = None
    self.init_system_instruction = self.format_system_instruction(self.ontology)
    # print(self.system_instruction)
    self.model = LLM(api_type, model_name_or_path, self.init_system_instruction, generation_kwargs)

  def format_system_instruction(self, ontology):
    intents = {intent: ontology['intents'][intent]['description'] for intent in ontology['intents']}
    # domains = {domain: '' for domain in ontology['domains']}
    self.slots = {domain: {
          slot: ontology['domains'][domain]['slots'][slot]['description'] 
          for slot in ontology['domains'][domain]['slots']
        } for domain in ontology['domains']}
    
    # categorical_slot_values = {domain: {
    #               slot: ontology['domains'][domain]['slots'][slot]['possible_values']
    #               for slot in ontology['domains'][domain]['slots'] if ontology['domains'][domain]['slots'][slot]['is_categorical']
    #             } for domain in ontology['domains']}
    
    system_instruction = "\n\n".join([
      """You are an excellent writing machine. You can generate fluent and precise natural language according to the given dialogue acts. Dialogue acts are a list of tuples, each tuple is in the form of (intent, domain, slot, value). The "intent", "domain", "slot" are defines as follows:""",
      '"intents": '+json.dumps(intents, indent=4),
      # '"domain2slots": '+json.dumps(slots, indent=4),
      '"domain2slots": '+"{{slot_descriptions}}",
      """Here are some examples:""",
      "{{example_dialogs}}",
      """Now consider the following dialogue acts. Please generate an utterance of {} that can express the given dialogue acts precisely. Start with <UTT> token and end with </UTT> token. Example: "<UTT>utterance</UTT>". Do not generate unrelated intents, domains and slots that are not in the given dialogue acts.""".format(self.speaker)
    ])
        
    return system_instruction
  
  def format_dialogue_acts(self, dialogue_acts):
    das = []
        
    if isinstance(dialogue_acts, dict):
      # da in unified format
      for da_type in dialogue_acts:
        for da in dialogue_acts[da_type]:
          intent, domain, slot, value = da['intent'], da['domain'], da['slot'], da.get('value', '')
          das.append((intent, domain, slot, value))
    elif isinstance(dialogue_acts[0], dict):
      # da without da type
      for da in dialogue_acts:
        intent, domain, slot, value = da['intent'], da['domain'], da['slot'], da.get('value', '')
        das.append((intent, domain, slot, value))
    elif isinstance(dialogue_acts[0], list):
      # da is a list of list (convlab-2 format)
      das = dialogue_acts
    else:
      raise ValueError(f"invalid dialog acts format {dialogue_acts}")
    return das

  def generate(self, dialogue_acts, context, example_dialogs, domains):
    filter_slots = {}
    for domain in domains:
        filter_slots[domain] = self.slots[domain]
    example = ''
    for example_dialog in example_dialogs:
      for i, turn in enumerate(example_dialog['turns']):
        if turn['speaker'] == self.speaker:
          if i > 0:
            example += example_dialog['turns'][i-1]['speaker']+': '+example_dialog['turns'][i-1]['utterance']+'\n'
          das = []
          for da_type in turn['dialogue_acts']:
            for da in turn['dialogue_acts'][da_type]:
              intent, domain, slot, value = da.get('intent'), da.get('domain'), da.get('slot', ''), da.get('value', '')
              das.append((intent, domain, slot, value))
          example += '<DA>'+json.dumps(das)+'</DA>'+'\n'
          example += turn['speaker']+': '+'<UTT>'+turn['utterance']+'</UTT>'+'\n\n'
    system_instruction = self.init_system_instruction.replace('{{slot_descriptions}}', json.dumps(filter_slots, indent=4))
    system_instruction = system_instruction.replace('{{example_dialogs}}', example)
    self.model.set_system_instruction(system_instruction)
    das = self.format_dialogue_acts(dialogue_acts)
    prompt = ""
    # # relevant concepts
    # prompt += "Relevant concepts:\n"
    # intents = set([da[0] for da in das])
    # prompt += '"intents": '+json.dumps({intent: self.ontology['intents'][intent]['description'] for intent in self.ontology['intents'] if intent in intents}, indent=4)+'\n\n'
    # slots = {}
    # for da in das:
    #   domain, slot = da[1], da[2]
    #   if domain not in slots:
    #     slots[domain] = {}
    #   if slot not in slots[domain] and slot in self.ontology['domains'][domain]['slots']:
    #     slots[domain][slot] = self.ontology['domains'][domain]['slots'][slot]['description']
    # prompt += '"domain2slots": '+json.dumps(slots, indent=4)+'\n\n'

    prompt += self.opponent+': '+context[-1]+'\n'
    prompt += '<DA>'+json.dumps(das)+'</DA>'+'\n\n'
    # print('='*50)
    # print('prompt')
    # print(prompt)
    response = self.model.chat(prompt)
    self.model.clear_chat_history()
    # print('response')
    # print(response)
    # print('='*100)
    response = self.normalize_response(response)
    return response
  
  def normalize_response(self, response):
    start_token, end_token = "<UTT>", "</UTT>"
    start_idx = response.find(start_token)
    end_idx = response.find(end_token)
    if start_idx == -1 or end_idx == -1:
      return {}
    response = response[start_idx+len(start_token):end_idx].strip()
    return response


In [180]:
das = [
    { # da in unified format
    "categorical": [],
    "non-categorical": [],
    "binary": [
      {
      "intent": "request",
      "domain": "taxi",
      "slot": "leave at"
      },
      {
      "intent": "request",
      "domain": "taxi",
      "slot": "arrive by"
      }
    ]
    },
    [ # da without da type
      {
      "intent": "inform",
      "domain": "taxi",
      "slot": "type",
      "value": "blue honda",
      "start": 38,
      "end": 48
      },
      {
      "intent": "inform",
      "domain": "taxi",
      "slot": "phone",
      "value": "07218068540",
      "start": 67,
      "end": 78
      }
    ],
    [ # da is a list of list (convlab-2 format)
      ["reqmore", "general", "", ""]
    ],
    {
    "categorical": [],
    "non-categorical": [],
    "binary": [
      {
      "intent": "bye",
      "domain": "general",
      "slot": ""
      }
    ]
    }
  ]
contexts = [
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton."],
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "What time do you want to leave and what time do you want to arrive by?",
  "I want to leave after 17:15."],
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "What time do you want to leave and what time do you want to arrive by?",
  "I want to leave after 17:15.",
  "Booking completed! your taxi will be blue honda Contact number is 07218068540",
  "Thank you for all the help! I appreciate it."],
  ["I would like a taxi from Saint John's college to Pizza Hut Fen Ditton.",
  "What time do you want to leave and what time do you want to arrive by?",
  "I want to leave after 17:15.",
  "Booking completed! your taxi will be blue honda Contact number is 07218068540",
  "Thank you for all the help! I appreciate it.",
  "You are welcome.  Is there anything else I can help you with today?",
  "No, I am all set.  Have a nice day.  Bye."],
]
dataset = load_dataset('multiwoz21')
example_dialogs = dataset['train'][:3]
nlg = LLM_NLG('multiwoz21', 'openai', 'gpt-3.5-turbo', 'system')
for da, context in zip(das, contexts):
  print(da)
  print(nlg.generate(da, context, domains))
  print()

{'categorical': [], 'non-categorical': [], 'binary': [{'intent': 'request', 'domain': 'taxi', 'slot': 'leave at'}, {'intent': 'request', 'domain': 'taxi', 'slot': 'arrive by'}]}
What time would you like to leave Saint John's college and what time would you like to arrive at Pizza Hut Fen Ditton?

[{'intent': 'inform', 'domain': 'taxi', 'slot': 'type', 'value': 'blue honda', 'start': 38, 'end': 48}, {'intent': 'inform', 'domain': 'taxi', 'slot': 'phone', 'value': '07218068540', 'start': 67, 'end': 78}]
I understand that you want to leave after 17:15. I have found a blue Honda taxi available at 07218068540. How can I assist you further?

[['reqmore', 'general', '', '']]
You're welcome! I'm glad I could help. Is there anything else I can assist you with?

{'categorical': [], 'non-categorical': [], 'binary': [{'intent': 'bye', 'domain': 'general', 'slot': ''}]}
Thank you! Have a nice day too. Goodbye!



In [10]:
def get_das_texts_contexts(dialogue, depth=None):
    turns = dialogue['turns']
    texts = []
    utterances = []
    contexts = []
    das = []
    for turn in turns:
        utterances.append(turn['utterance'])
        if turn['speaker'] == 'user':
            das.append(turn['dialogue_acts'])
            contexts.append(utterances[:])
        elif turn['speaker'] == 'system':
            texts.append(turn['utterance'])
    if depth:
        contexts = [context[-depth::] for context in contexts]
    return das, texts, contexts


def get_example_dialoges(dataset, domains, cnt=3, turn_threshold=10):
    filter_dataset = []
    example_dialogs = []
    train_dataset = dataset['train']
    for data in train_dataset:
        if len(data['turns']) < turn_threshold:
            filter_dataset.append(data)
    for data in filter_dataset:
        if sorted(data['domains']) == sorted(domains):
            example_dialogs.append(data)
    if len(example_dialogs) == 0:
        for data in filter_dataset:
            if len(set(data['domains']).intersection(domains)) > 0:
                example_dialogs.append(data)
    if len(example_dialogs) < cnt:
        sample = example_dialogs
    else:
        sample = random.sample(example_dialogs, cnt)
    return sample

In [203]:
test_dataset = dataset['train'][0]
das, texts, contexts = get_das_texts_contexts(test_dataset)


In [208]:
contexts

[['am looking for a place to to stay that has cheap price range it should be in a type of hotel'],
 ['am looking for a place to to stay that has cheap price range it should be in a type of hotel',
  'Okay, do you have a specific area you want to stay in?',
  "no, i just need to make sure it's cheap. oh, and i need parking"],
 ['am looking for a place to to stay that has cheap price range it should be in a type of hotel',
  'Okay, do you have a specific area you want to stay in?',
  "no, i just need to make sure it's cheap. oh, and i need parking",
  'I found 1 cheap hotel for you that includes parking. Do you like me to book it?',
  'Yes, please. 6 people 3 nights starting on tuesday.'],
 ['am looking for a place to to stay that has cheap price range it should be in a type of hotel',
  'Okay, do you have a specific area you want to stay in?',
  "no, i just need to make sure it's cheap. oh, and i need parking",
  'I found 1 cheap hotel for you that includes parking. Do you like me to bo

In [17]:
dataset = load_dataset('multiwoz21')
test_dataset = dataset['train'][0]
domains = test_dataset['domains']
example_dialogs = get_example_dialoges(dataset, domains)
das, texts, contexts = get_das_texts_contexts(test_dataset)
nlg = LLM_NLG('multiwoz21', 'openai', 'gpt-3.5-turbo', 'system')
for da, context in zip(das, contexts):
  print(nlg.generate(da, context, example_dialogs, domains))
  print('-----')

I found several hotels with a cheap price range. Is there a specific area you would like to stay in?
-----
Okay, I will find a cheap hotel with parking for you.
-----
Sure, I have booked a room for 6 people, starting on Tuesday and staying for 3 nights. Is there anything else I can assist you with?
-----
Alright, I have adjusted the booking to 2 nights. Is there anything else I can assist you with?
-----
Alright, if you have any more questions in the future, feel free to ask. Have a great day! Goodbye.
-----


In [18]:
len(texts)

5

In [187]:
for turn in dataset['train'][0]['turns']:
  print(turn['dialogue_acts'])

{'categorical': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'price range', 'value': 'cheap'}], 'non-categorical': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'type', 'value': 'hotel', 'start': 87, 'end': 92}], 'binary': []}
{'categorical': [], 'non-categorical': [], 'binary': [{'intent': 'request', 'domain': 'hotel', 'slot': 'area'}]}
{'categorical': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'parking', 'value': 'yes'}], 'non-categorical': [], 'binary': []}
{'categorical': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'price range', 'value': 'cheap'}], 'non-categorical': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'choice', 'value': '1', 'start': 8, 'end': 9}], 'binary': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'parking'}]}
{'categorical': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'book day', 'value': 'tuesday'}], 'non-categorical': [{'intent': 'inform', 'domain': 'hotel', 'slot': 'book stay', 'value': '3', 'start': 22, 'end': 23}, {'intent': 'inf

In [182]:
print(nlg.system_instruction)

You are an excellent writing machine. You can generate fluent and precise natural language according to the given dialogue acts. Dialogue acts are a list of tuples, each tuple is in the form of (intent, domain, slot, value). The "intent", "domain", "slot" are defines as follows:

"intents": {
    "inform": "inform the value of a slot",
    "request": "ask for the value of a slot",
    "nobook": "inform the user that the booking is failed",
    "reqmore": "ask the user for more instructions",
    "book": "book something for the user",
    "bye": "say goodbye to the user and end the conversation",
    "thank": "thanks for the help",
    "welcome": "you're welcome",
    "greet": "express greeting",
    "recommend": "recommend a choice to the user",
    "select": "provide several choices for the user",
    "offerbook": "ask the user if he or she needs booking",
    "offerbooked": "provide information about the booking",
    "nooffer": "inform the user that there is no result satisfies user

In [25]:
dataset = load_dataset('sgd')
ontology = load_ontology('sgd')

In [29]:
# 'domains', 'intents', 'state', 'dialogue_acts'
ontology.keys()

dict_keys(['domains', 'intents', 'state', 'dialogue_acts'])

In [57]:
test_domains = []
for d in dataset['test']:
  test_domains.append('_'.join(sorted(d['domains'])))

train_domains = []
for d in dataset['train']:
  train_domains.append('_'.join(sorted(d['domains'])))

In [33]:
from collections import Counter

In [20]:
das = []
utters = []
for turn in dataset['train'][0]['turns']:
  da_per_turn = []
  for k, v in turn['dialogue_acts'].items():
    if v:
      if k == 'binary':
        da_per_turn.extend(v)
      else:
        for each_v in v:
          print(each_v)
          da_per_turn.append({'intent': each_v['intent'], 'domain': each_v['domain'], 'slot': each_v['slot'], 'value': each_v['value']})
  das.append(da_per_turn)
  utters.append(turn['utterance'])

{'intent': 'inform', 'domain': 'hotel', 'slot': 'price range', 'value': 'cheap'}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'type', 'value': 'hotel', 'start': 87, 'end': 92}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'parking', 'value': 'yes'}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'price range', 'value': 'cheap'}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'choice', 'value': '1', 'start': 8, 'end': 9}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'book day', 'value': 'tuesday'}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'book stay', 'value': '3', 'start': 22, 'end': 23}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'book people', 'value': '6', 'start': 13, 'end': 14}
{'intent': 'nobook', 'domain': 'hotel', 'slot': 'book day', 'value': 'Tuesday'}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'book stay', 'value': '2', 'start': 15, 'end': 16}
{'intent': 'inform', 'domain': 'hotel', 'slot': 'ref', 'value': '7GAWK763', 'start': 46, 'end': 54}


In [22]:
for idx, da in enumerate(das):
  if idx % 2 == 0:
    print(f'user: {da}')
  else:
    print(f'system: {da}')

user: [{'intent': 'inform', 'domain': 'hotel', 'slot': 'price range', 'value': 'cheap'}, {'intent': 'inform', 'domain': 'hotel', 'slot': 'type', 'value': 'hotel'}]
system: [{'intent': 'request', 'domain': 'hotel', 'slot': 'area'}]
user: [{'intent': 'inform', 'domain': 'hotel', 'slot': 'parking', 'value': 'yes'}]
system: [{'intent': 'inform', 'domain': 'hotel', 'slot': 'price range', 'value': 'cheap'}, {'intent': 'inform', 'domain': 'hotel', 'slot': 'choice', 'value': '1'}, {'intent': 'inform', 'domain': 'hotel', 'slot': 'parking'}]
user: [{'intent': 'inform', 'domain': 'hotel', 'slot': 'book day', 'value': 'tuesday'}, {'intent': 'inform', 'domain': 'hotel', 'slot': 'book stay', 'value': '3'}, {'intent': 'inform', 'domain': 'hotel', 'slot': 'book people', 'value': '6'}]
system: [{'intent': 'nobook', 'domain': 'hotel', 'slot': 'book day', 'value': 'Tuesday'}, {'intent': 'request', 'domain': 'hotel', 'slot': 'book stay'}, {'intent': 'request', 'domain': 'hotel', 'slot': 'book day'}]
user:

In [39]:
import json
import ast

sys_intents = []
user_intents = []
for k, v in ontology['dialogue_acts'].items():
  for da in v:
    try:
      #da = da.replace("'", '"')
      #da = json.loads(da)
      da = ast.literal_eval(da)
    except:
      print(da)
      break
    if da['user']:
      user_intents.append(da['intent'])
    if da['system']:
      sys_intents.append(da['intent'])

In [44]:
print(set(user_intents))

{'inform', 'bye', 'thank', 'request', 'greet'}


In [53]:
for data in dataset['train']:
  for turn in data['turns']:
    for k, v in turn['dialogue_acts'].items():
      for da in v:
        if da['intent'] == 'select':
          print(turn['utterance'])
          print(turn['dialouge_acts'])

City centre north b and b and Worth house are both guesthouses and located in the north. Would you like me to book one for you?


KeyError: 'state'

In [48]:
type(dataset['train'][0]['turns'][0]['dialogue_acts'])

dict

In [47]:
print(sorted(Counter(test_domains).keys()))
print(sorted(Counter(train_domains).keys()))

['Alarm_1', 'Buses_3', 'Events_3', 'Flights_4', 'Homes_2', 'Hotels_2', 'Hotels_4', 'Media_3', 'Messaging_1', 'Movies_1', 'Movies_3', 'Music_3', 'Payment_1', 'RentalCars_3', 'Restaurants_2', 'RideSharing_2', 'Services_1', 'Services_4', 'Trains_1', 'Travel_1', 'Weather_1']
['Banks_1', 'Buses_1', 'Buses_2', 'Calendar_1', 'Events_1', 'Events_2', 'Flights_1', 'Flights_2', 'Homes_1', 'Hotels_1', 'Hotels_2', 'Hotels_3', 'Media_1', 'Movies_1', 'Music_1', 'Music_2', 'RentalCars_1', 'RentalCars_2', 'Restaurants_1', 'RideSharing_1', 'RideSharing_2', 'Services_1', 'Services_2', 'Services_3', 'Travel_1', 'Weather_1']


In [59]:
single_test = []
for data in dataset['test']:
  if len(data['domains']) == 1:
    single_test.append(data['domains'][0])
len(single_test)

1331

In [39]:
import re
re.match(r'^\d+', '1. this is').group(0)

'1'

```

```