In [1]:
import pickle
import json
import import_ipynb
from utils import remove_empty_slots
from state_tracker import StateTracker
from dqn_agent import DQNAgent
from user_simulator import UserSimulator
from error_model_controller import ErrorModelController

importing Jupyter notebook from utils.ipynb
importing Jupyter notebook from dialogue_config.ipynb
importing Jupyter notebook from state_tracker.ipynb
importing Jupyter notebook from db_query.ipynb
importing Jupyter notebook from dqn_agent.ipynb


Using TensorFlow backend.


importing Jupyter notebook from user_simulator.ipynb
importing Jupyter notebook from error_model_controller.ipynb


In [2]:
# Special slot values (for reference)
'PLACEHOLDER'  # For informs
'UNK'  # For requests
'anything'  # means any value works for the slot with this value
'no match available'  # When the intent of the agent is match_found yet no db match fits current constraints

# The goal of the agent is to inform a match for this key
usersim_default_key = 'ticket'

### Dialogue config constants used by Agent

In [3]:
# Possible inform and request slots for the agent
agent_inform_slots = ['moviename', 'theater', 'starttime', 'date', 'genre', 'state', 'city', 'zip', 'critic_rating',
                     'mpaa_rating', 'distanceconstraints', 'video_format', 'theater_chain', 'price', 'actor',
                     'description', 'other', 'numberofkids']
agent_request_slots = ['moviename', 'theater', 'starttime', 'date', 'numberofpeople', 'genre', 'state', 'city', 'zip',
                      'critic_rating', 'mpaa_rating', 'distanceconstraints', 'video_format', 'theater_chain', 'price',
                      'actor', 'description', 'other', 'numberofkids']

# Possible actions for agent
agent_actions = [
   {'intent': 'done', 'inform_slots': {}, 'request_slots': {}},  # Triggers closing of conversation
   {'intent': 'match_found', 'inform_slots': {}, 'request_slots': {}}
]
for slot in agent_inform_slots:
   agent_actions.append({'intent': 'inform', 'inform_slots': {slot: 'PLACEHOLDER'}, 'request_slots': {}})
for slot in agent_request_slots:
   agent_actions.append({'intent': 'request', 'inform_slots': {}, 'request_slots': {slot: 'UNK'}})

# Rule-based policy request list
rule_requests = ['moviename', 'starttime', 'city', 'date', 'theater', 'numberofpeople']
# These are possible inform slot keys that cannot be used to query
no_query_keys = ['numberofpeople', usersim_default_key]

In [4]:
constants_file = 'constants.json'
with open(constants_file) as f:
        constants = json.load(f)

In [5]:
# Load file path constants
file_path_dict = constants['db_file_paths']
DATABASE_FILE_PATH = file_path_dict['database']
DICT_FILE_PATH = file_path_dict['dict']
USER_GOALS_FILE_PATH = file_path_dict['user_goals']

 # Load run constants
run_dict = constants['run']
USE_USERSIM = run_dict['usersim']
WARMUP_MEM = run_dict['warmup_mem']
NUM_EP_TRAIN = run_dict['num_ep_run']
TRAIN_FREQ = run_dict['train_freq']
MAX_ROUND_NUM = run_dict['max_round_num']
SUCCESS_RATE_THRESHOLD = run_dict['success_rate_threshold']

In [6]:
# Load movie DB
# Note: If you get an unpickling error here then run 'pickle_converter.py' and it should fix it
database = pickle.load(open(DATABASE_FILE_PATH, 'rb'), encoding='latin1')

# Clean DB
remove_empty_slots(database)

In [7]:
# Load movie dict
db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1')

# Load goal file
user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'), encoding='latin1')

In [8]:
user = UserSimulator(user_goals, constants, database)

In [9]:
emc = ErrorModelController(db_dict, constants)

In [10]:
state_tracker = StateTracker(database, constants)


In [11]:
state_tracker.get_state_size()

224

In [12]:
from dqn_agent import DQNAgent
dqn_agent = DQNAgent(state_tracker.get_state_size(), constants)

In [13]:
# Step 1 : Get the current state which is equivalent to the previous next state OR an initial state if this is 
#          the start of the episode and send it as input to the agent’s get action method

###   Runs the warmup stage of training which is used to fill the agents memory.The agent uses it's rule-based policy to make actions. The agent's memory is filled as this runs. Loop terminates when the size of the memory is equal to WARMUP_MEM or when the memory buffer is full.
    

In [14]:
def run_round(state, warmup=False):
    # 1) Agent takes action given state tracker's representation of dialogue (state)
    print('Get action...')
    agent_action_index, agent_action = dqn_agent.get_action(state, use_rule=warmup)
    print('Action : ',agent_action)
    # 2) Update state tracker with the agent's action
    state_tracker.update_state_agent(agent_action)
    print('UPDATED HISTORY')
    state_tracker.print_history()
    # 3) User takes action given agent action
    user_action, reward, done, success = user.step(agent_action)
    print('User action : {} , Reward : {} , Done : {} , Success : {}'.format(user_action, reward, done, success))
    if not done:
        # 4) Infuse error into semantic frame level of user action
        emc.infuse_error(user_action)
    # 5) Update state tracker with user action
    state_tracker.update_state_user(user_action)
    # 6) Get next state and add experience
    next_state = state_tracker.get_state(done)
    dqn_agent.add_experience(state, agent_action_index, reward, next_state, done)
    
    return next_state, reward, done, success

In [15]:
def warmup_run():
    print('Warmup Started...')
    total_step = 0
    while total_step != WARMUP_MEM and not dqn_agent.is_memory_full():
        # Reset episode
        episode_reset()
        done = False
        # Get initial state from state tracker
        state = state_tracker.get_state()
        print('State : ',state)
        while not done:
            next_state, _, done, _ = run_round(state, warmup=True)
            total_step += 1
            state = next_state

    print('...Warmup Ended')

In [16]:
def episode_reset():
    """
    Resets the episode/conversation in the warmup and training loops.
    Called in warmup and train to reset the state tracker, user and agent. Also get's the initial user action.
    """

    # First reset the state tracker
    state_tracker.reset()
    # Then pick an init user action
    user_action = user.reset()
    # Infuse with error
    emc.infuse_error(user_action)
    # And update state tracker
    state_tracker.update_state_user(user_action)
    # Finally, reset agent
    dqn_agent.reset()

### Runs the loop that trains the agent. Trains the agent on the goal-oriented chatbot task. Training of the agent's neural network occurs every episode that TRAIN_FREQ is a multiple of. Terminates when the episode reaches NUM_EP_TRAIN.

#### 1. Get state : Returns the state representation as a numpy array which is fed into the agent's neural network. The state representation contains useful information for the agent about the current state of the conversation. Processes by the agent to be fed into the neural network. 

#### 2. Returns the action of the agent given a state. Gets the action of the agent given the current state. Either the rule-based policy or the neural networks are used to respond.

#### 3. Update state tracker : Updates the dialogue history with the agent's action and augments the agent's action. Takes an agent action and updates the history. Also augments the agent_action param with query information and any other necessary information. Get the agent’s action and send it to the ST update method for an agent action: The ST updates its own history of the current conversation in this method as well as updating the agent action with database query information

#### 4. The updated agent action is sent as input into the user’s step method: In step the user sim crafts its own rule-based response and also outputs reward and success information. Return the response of the user sim. to the agent by using rules that simulate a user. Given the agent action craft a response by using deterministic rules that simulate (to some extent) a user. Some parts of the rules are stochastic. Check if the agent has succeeded or lost or still going.

In [17]:
def train_run():

    print('Training Started...')
    episode = 0
    period_reward_total = 0
    period_success_total = 0
    success_rate_best = 0.0
    while episode < NUM_EP_TRAIN:
        episode_reset()
        episode += 1
        done = False
        state = state_tracker.get_state()
        print('STATE')
        print(state)
        print('HISTORY')
        state_tracker.print_history()
        while not done:
            next_state, reward, done, success = run_round(state)
            period_reward_total += reward
            state = next_state

        period_success_total += success

        # Train
        if episode % TRAIN_FREQ == 0:
            # Check success rate
            success_rate = period_success_total / TRAIN_FREQ
            avg_reward = period_reward_total / TRAIN_FREQ
            # Flush
            if success_rate >= success_rate_best and success_rate >= SUCCESS_RATE_THRESHOLD:
                dqn_agent.empty_memory()
            # Update current best success rate
            if success_rate > success_rate_best:
                print('Episode: {} NEW BEST SUCCESS RATE: {} Avg Reward: {}' .format(episode, success_rate, avg_reward))
                success_rate_best = success_rate
                dqn_agent.save_weights()
            period_success_total = 0
            period_reward_total = 0
            # Copy
            dqn_agent.copy()
            # Train
            dqn_agent.train()
    print('...Training Ended')

In [18]:
train_run()

Training Started...
Current form :  {'moviename': 'kung fu panda 3'}
DB Results :  {'moviename': 35, 'matching_all_constraints': 35}
Latest agent action :  None
One hot intents user action :  [0. 1. 0. 0. 0. 0.]
Bag of inform slots :  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
Bag of request slots :  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0.]
STATE REPRESENT :  [0.   1.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.   0.   1.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0

{'intent': 'inform', 'request_slots': {}, 'inform_slots': {'date': 'tomorrow'}, 'round': 5, 'speaker': 'User'}
{'intent': 'inform', 'inform_slots': {'critic_rating': 'no match available'}, 'request_slots': {}, 'round': 6, 'speaker': 'Agent'}
{'intent': 'inform', 'request_slots': {}, 'inform_slots': {'starttime': '8:45 pm'}, 'round': 6, 'speaker': 'User'}
{'intent': 'inform', 'inform_slots': {'critic_rating': 'no match available'}, 'request_slots': {}, 'round': 7, 'speaker': 'Agent'}
SUCCESS :  0
Round : 7 , Reward : -1
User action : {'intent': 'inform', 'request_slots': {}, 'inform_slots': {'numberofpeople': '2'}} , Reward : -1 , Done : False , Success : False
Current form :  {'moviename': 'big short', 'critic_rating': 'no match available', 'video_format': 'anything', 'theater': 'regal meridian 16', 'city': 'seattle', 'date': 'tomorrow', 'starttime': '8:45 pm', 'numberofpeople': '2'}
DB Results :  {'moviename': 3, 'critic_rating': 0, 'video_format': 991, 'theater': 131, 'city': 303, 'd

{'intent': 'request', 'inform_slots': {}, 'request_slots': {'distanceconstraints': 'UNK'}, 'round': 2, 'speaker': 'Agent'}
{'intent': 'inform', 'request_slots': {}, 'inform_slots': {'distanceconstraints': 'anything'}, 'round': 2, 'speaker': 'User'}
{'intent': 'request', 'inform_slots': {}, 'request_slots': {'distanceconstraints': 'UNK'}, 'round': 3, 'speaker': 'Agent'}
{'intent': 'inform', 'request_slots': {}, 'inform_slots': {'distanceconstraints': 'your area'}, 'round': 3, 'speaker': 'User'}
{'intent': 'inform', 'inform_slots': {'critic_rating': 'no match available'}, 'request_slots': {}, 'round': 4, 'speaker': 'Agent'}
{'intent': 'inform', 'request_slots': {}, 'inform_slots': {'date': 'tomorrow'}, 'round': 4, 'speaker': 'User'}
{'intent': 'inform', 'inform_slots': {'theater': 'no match available'}, 'request_slots': {}, 'round': 5, 'speaker': 'Agent'}
{'intent': 'inform', 'request_slots': {}, 'inform_slots': {'numberofpeople': '5'}, 'round': 5, 'speaker': 'User'}
{'intent': 'inform',

Action :  {'intent': 'inform', 'inform_slots': {'mpaa_rating': 'PLACEHOLDER'}, 'request_slots': {}}
UPDATED HISTORY
{'intent': 'request', 'request_slots': {'ticket': 'UNK'}, 'inform_slots': {'moviename': 'the witch'}, 'round': 0, 'speaker': 'User'}
{'intent': 'inform', 'inform_slots': {'date': 'tomorrow'}, 'request_slots': {}, 'round': 1, 'speaker': 'Agent'}
{'intent': 'request', 'request_slots': {'ticket': 'UNK'}, 'inform_slots': {}, 'round': 1, 'speaker': 'User'}
{'intent': 'request', 'inform_slots': {}, 'request_slots': {'distanceconstraints': 'UNK'}, 'round': 2, 'speaker': 'Agent'}
{'intent': 'inform', 'request_slots': {}, 'inform_slots': {'distanceconstraints': 'anything'}, 'round': 2, 'speaker': 'User'}
{'intent': 'inform', 'inform_slots': {'mpaa_rating': 'no match available'}, 'request_slots': {}, 'round': 3, 'speaker': 'Agent'}
SUCCESS :  0
Round : 3 , Reward : -1
User action : {'intent': 'inform', 'request_slots': {}, 'inform_slots': {'numberofpeople': '2'}} , Reward : -1 , Do

In [19]:
warmup_run()

Warmup Started...
Current form :  {'moviename': 'star wars'}
DB Results :  {'moviename': 40, 'matching_all_constraints': 40}
Latest agent action :  None
One hot intents user action :  [0. 1. 0. 0. 0. 0.]
Bag of inform slots :  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
Bag of request slots :  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0.]
STATE REPRESENT :  [0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0

NameError: name 'FAIL' is not defined

In [43]:
train_run()

Training Started...
Episode: 400 NEW BEST SUCCESS RATE: 0.03 Avg Reward: -36.03
Episode: 1000 NEW BEST SUCCESS RATE: 0.11 Avg Reward: -32.1
Episode: 1500 NEW BEST SUCCESS RATE: 0.31 Avg Reward: -19.09
Episode: 1600 NEW BEST SUCCESS RATE: 0.39 Avg Reward: -14.07
Episode: 2500 NEW BEST SUCCESS RATE: 0.41 Avg Reward: -10.16
Episode: 2600 NEW BEST SUCCESS RATE: 0.45 Avg Reward: -7.32
Episode: 2700 NEW BEST SUCCESS RATE: 0.51 Avg Reward: -3.41
Episode: 2800 NEW BEST SUCCESS RATE: 0.52 Avg Reward: -2.2
Episode: 3700 NEW BEST SUCCESS RATE: 0.55 Avg Reward: 0.27
Episode: 4400 NEW BEST SUCCESS RATE: 0.56 Avg Reward: 1.34
Episode: 4700 NEW BEST SUCCESS RATE: 0.62 Avg Reward: 6.99
Episode: 5100 NEW BEST SUCCESS RATE: 0.66 Avg Reward: 9.75
Episode: 5600 NEW BEST SUCCESS RATE: 0.75 Avg Reward: 15.19
Episode: 5900 NEW BEST SUCCESS RATE: 0.79 Avg Reward: 18.81
Episode: 6200 NEW BEST SUCCESS RATE: 0.81 Avg Reward: 19.09
Episode: 7200 NEW BEST SUCCESS RATE: 0.84 Avg Reward: 21.39
Episode: 8900 NEW BEST