In [None]:
import numpy as np
import operator, sys, os
import tensorflow as tf
import plotting
import itertools
import matplotlib
import collections
import random
matplotlib.style.use('ggplot')

In [None]:
insured_neg_question_table = np.array([
                              ["insured_neg_yng_adult_life","insured_neg_yng_adult_car","insured_neg_yng_adult_dental","insured_neg_yng_adult_health","insured_neg_yng_adult_disability"],
                              ["insured_neg_middle-age_life","insured_neg_middle-age_car","insured_neg_middle-age_dental","insured_neg_middle-age_health","insured_neg_middle-age_disability"],
                              ["insured_neg_old-age_life","insured_neg_old-age_car","insured_neg_old-age_dental","insured_neg_old-age_health","insured_neg_old-age_disability"]
                               ])

insured_pos_question_table = np.array([
                              ["insured_pos_yng_adult_life","insured_pos_yng_adult_car","insured_pos_yng_adult_dental","insured_pos_yng_adult_health","insured_pos_yng_adult_disability"],
                              ["insured_pos_middle-age_life","insured_pos_middle-age_car","insured_pos_middle-age_dental","insured_pos_middle-age_health","insured_pos_middle-age_disability"],
                              ["insured_pos_old-age_life","insured_pos_old-age_car","insured_pos_old-age_dental","insured_pos_old-age_health","insured_pos_old-age_disability"]
                               ])

open_question_table = np.array([
                              ["open_yng_adult_life","open_yng_adult_car","open_yng_adult_dental","open_yng_adult_health","open_yng_adult_disability"],
                              ["open_middle-age_life","open_middle-age_car","open_middle-age_dental","open_middle-age_health","open_middle-age_disability"],
                              ["open_old-age_life","open_old-age_car","open_old-age_dental","open_old-age_health","open_old-age_disability"]
                               ])



def get_action_dict(open_question_table,insured_pos_question_table,insured_neg_question_table):
    
    action_dict = dict({"open":{"young_adult": {val:idx for idx, val in enumerate(open_question_table[0])},
                "middle_age":{val:idx for idx, val in enumerate(open_question_table[1],len(open_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(open_question_table[2],2*len(open_question_table[2]))}},
         "insured_pos":{"young_adult": {val:idx for idx, val in enumerate(insured_pos_question_table[0],3*len(insured_pos_question_table[2]))},
                "middle_age":{val:idx for idx, val in enumerate(insured_pos_question_table[1],4*len(insured_pos_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(insured_pos_question_table[2],5*len(insured_pos_question_table[2]))}},
         "insured_neg":{"young_adult": {val:idx for idx, val in enumerate(insured_neg_question_table[0],6*len(insured_neg_question_table[0]))},
                "middle_age":{val:idx for idx, val in enumerate(insured_neg_question_table[1],7*len(insured_neg_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(insured_neg_question_table[2],8*len(insured_neg_question_table[2]))}},
                       "close":{"all":{"Is the Customer Interested?":45}} })
    return action_dict


In [None]:
class User:
    def __init__(self, user_class, action_dict):
        self.name="Simulated-user"
        self.user_class = user_class
        self.state = 0
        self.action_dict = action_dict
        self.prev_response = 0
        self.is_insured = 0
        
    def print_user_speech(self):
        if self.prev_response == 1:
            print("USER SAID YES")
        elif self.prev_response == 0:
            print("USER SAID NO")
            
        
    def respond(self,action):
        if (self.user_class == 0):
            if (self.state == 0):
                if (action in self.action_dict['open']['young_adult'].values()):
                    self.state = 1
                    self.prev_response = np.random.choice([0,1], p=[0.5,0.5])
                    if self.prev_response == 0:
                        self.is_insured = 0
                    else:
                        self.is_insured = 1
                    reward = 0
                    self.print_user_speech()
                    return (self.state, self.prev_response,self.is_insured, reward)
                else:
                    print("User said I DID NOT UNDERSTAND",action)
                    reward = -1
                    self.state = 0
                    self.is_insured = 0
                    self.prev_response = 0

                    return (self.state, self.prev_response,self.is_insured, reward)
            elif ((self.state == 1) and (self.is_insured == 1)):
                if (action in self.action_dict['insured_pos']['young_adult'].values()):
                    self.state = 2
                    self.prev_response = np.random.choice([0,1], p=[0.2,0.8])
                    self.print_user_speech()
                    reward = 0
                    return (self.state, self.prev_response,self.is_insured, reward)
                else:
                    print("User said I DID NOT UNDERSTAND",action)
                    reward = -1
                    self.state = 1
                    self.prev_response = 0

                    return (self.state, self.prev_response,self.is_insured, reward)
            elif ((self.state == 1) and (self.is_insured == 0)):
                if (action in self.action_dict['insured_neg']['young_adult'].values()):
                    self.state = 2
                    self.prev_response = np.random.choice([0,1], p=[0.2,0.8])
                    self.print_user_speech()
                    reward = 0
                    return (self.state, self.prev_response,self.is_insured, reward)
                else:
                    print("User said I DID NOT UNDERSTAND",action)
                    reward = -1
                    self.state = 1
                    self.prev_response = 0
 
                    return (self.state, self.prev_response,self.is_insured, reward)
            elif (self.state == 2):
                if (action in self.action_dict['close']['all'].values()):
                    print("[Close] State reached; Need action: 45, Got action",action)
                    if self.prev_response == 1:
                        self.state = 4
                        reward = 1
                    else:
                        self.state = 3
                        reward = 1
                    self.print_user_speech()
                    
                    return (self.state, self.prev_response,self.is_insured, reward)
                else:
                    print("User said I DID NOT UNDERSTAND",action)
                    reward = -1
                    self.state = 2
                    self.prev_response = 0
       
                    return (self.state, self.prev_response,self.is_insured, reward)
        elif (self.user_class == 1):
                    if (self.state == 0):
                        if (action in self.action_dict['open']['middle_age'].values()):
                            self.state = 1
                            self.prev_response = np.random.choice([0,1], p=[0.5,0.5])
                            if self.prev_response == 0:
                                self.is_insured = 0
                            else:
                                self.is_insured = 1
                            reward = 0
                            self.print_user_speech()
                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = 0
                            self.state = 3
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    elif ((self.state == 1) and (self.is_insured == 1)):
                        if (action in self.action_dict['insured_pos']['middle_age'].values()):
                            self.state = 2
                            self.prev_response = np.random.choice([0,1], p=[0.2,0.8])
                            self.print_user_speech()
                            reward = 0
                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = -1
                            self.state = 3
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    elif ((self.state == 1) and (self.is_insured == 0)):
                        if (action in self.action_dict['insured_neg']['middle_age'].values()):
                            self.state = 2
                            self.prev_response = np.random.choice([0,1], p=[0.2,0.8])
                            self.print_user_speech()
                            reward = 0
                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = -1
                            self.state = 3
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    elif (self.state == 2):
                        if (action in self.action_dict['close']['all'].values()):
                            print("[Close] State reached; Need action: 45, Got action",action)
                            if self.prev_response == 1:
                                self.state = 4
                                reward = 1
                            else:
                                self.state = 3
                                reward = -1
                            self.print_user_speech()

                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = -1
                            self.state = 3
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    else:
                        print("Something went wrong Inside. Cirrent State:",self.state)

        elif (self.user_class == 2):
                    if (self.state == 0):
                        if (action in self.action_dict['open']['old_age'].values()):
                            self.state = 1
                            self.prev_response = np.random.choice([0,1], p=[0.5,0.5])
                            if self.prev_response == 0:
                                self.is_insured = 0
                            else:
                                self.is_insured = 1
                            reward = 0
                            self.print_user_speech()
                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = 0
                            self.state = 0
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    elif ((self.state == 1) and (self.is_insured == 1)):
                        if (action in self.action_dict['insured_pos']['old_age'].values()):
                            self.state = 2
                            self.prev_response = np.random.choice([0,1], p=[0.2,0.8])
                            self.print_user_speech()
                            reward = 0
                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = -1
                            self.state = 1
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    elif ((self.state == 1) and (self.is_insured == 0)):
                        if (action in self.action_dict['insured_neg']['old_age'].values()):
                            self.state = 2
                            self.prev_response = np.random.choice([0,1], p=[0.2,0.8])
                            self.print_user_speech()
                            reward = 0
                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = -1
                            self.state = 1
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    elif (self.state == 2):
                        if (action in self.action_dict['close']['all'].values()):
                            print("[Close] State reached; Need action: 45, Got action",action)
                            if self.prev_response == 1:
                                self.state = 4
                                reward = 1
                            else:
                                self.state = 3
                                reward = -1
                            self.print_user_speech()

                            return (self.state, self.prev_response,self.is_insured, reward)
                        else:
                            print("User said I DID NOT UNDERSTAND",action)
                            reward = -1
                            self.state = 2
                            self.prev_response = 0

                            return (self.state, self.prev_response,self.is_insured, reward)
                    else:
                        print("Something went wrong Inside. Cirrent State:",self.state)





In [None]:
class Environment:
    def __init__(self, uclass, iclass):
        self.name="Chat-Environment"

        self.user_class = uclass
        self.item_class = iclass

        self.prev_response = 0

        # {open:0, ask:1, close:2}
        self.dialog_state_dict = {"open":0, "ask":1, "close":2, "closed_0":3, "closed_1":4}
        self.dialog_state = 0
        self.is_insured = 0

        # State and Action Space
        self.action_dict = get_action_dict(open_question_table,insured_pos_question_table,insured_neg_question_table)

        self.N_actions = 0
        for key, value in self.action_dict.items():
            for nestedkey,nestedvalue in self.action_dict[key].items():
                    self.N_actions = (len(self.action_dict[key][nestedkey].items()) + self.N_actions)

        print("No. actions:", self.N_actions)

        self.state_dim = len([self.user_class,
                        self.item_class,
                        self.prev_response,
                        self.dialog_state,
                        self.is_insured])
        self.action_dim = (self.N_actions,)

        self.reward = 0
        self.state = None
            
            
    def define_rewards(self):
        if(self.dialog_state == 4):
            reward = 1
            return reward
        elif(self.dialog_state == 3):
            reward = -0.1
            return reward
        else:
            reward = 0
            return reward
        
    def allowed_actions(self, action_probs):
        actions = []
        allowed_action_probs = []
        state = self.state
        dialog_state = state[3]
        if (dialog_state == 0):
            for key, value in self.action_dict["open"]['young_adult'].items():
                actions.append(value)
            allowed_action_probs = action_probs[actions]
            return actions, allowed_action_probs
        elif (dialog_state == 1 and state[2] == 1 ):
            for key, value in self.action_dict['insured_pos']['young_adult'].items():
                actions.append(value)
            allowed_action_probs = action_probs[actions]
            return actions, allowed_action_probs
        elif (dialog_state == 1 and state[2] == 0 ):
            for key, value in self.action_dict['insured_neg']['young_adult'].items():
                actions.append(value)
            allowed_action_probs = action_probs[actions]
            return actions, allowed_action_probs
        elif (dialog_state == 2):
            for key, value in self.action_dict["close"]['all'].items():
                actions.append(value)
            allowed_action_probs = action_probs[actions]
            return actions, allowed_action_probs
        else:
            print("Dialog state is wrong or terminal state is reached")
            return actions, allowed_action_probs
    
    def reset(self):
        print("_______ENV RESET___________ ")
        self.state = [0,0,0,0,0]
        self.dialog_state = 0
        self.is_insured = 0
        self.prev_response = 0
        return np.array(self.state).flatten()
    
    def is_terminal(self, state):
        if((state[3] == 3) | (state[3] == 4)):
            return 1
        else:
            return 0
        
    def step(self, action, user):
        print("Bot said ",action)
        next_dialog_state, prev_response,is_insured, reward = user.respond(action)
        self.reward = reward
        self.dialog_state = next_dialog_state
        self.prev_response = prev_response
        self.is_insured = is_insured
        self.state = [self.user_class,
                        self.item_class,
                        self.prev_response,
                        self.dialog_state,
                        self.is_insured]
        done = self.is_terminal(self.state)
        if done == 1:
            done = True
        else:
            done = False
        
        return np.array(self.state), done, self.reward
    

        
        
        
        
        
            
            

In [None]:

env = Environment(0,0)

In [None]:
class PolicyEstimator():
    """
    Policy Function approximator. 
    """
    
    def __init__(self, learning_rate=0.01, scope="policy_estimator"):
        with tf.variable_scope(scope):
            self.state = tf.placeholder(tf.float32, [None,int(env.state_dim)], "state")
            self.action = tf.placeholder(dtype=tf.int32, name="action")
            self.target = tf.placeholder(dtype=tf.float32, name="target")

            # This is just table lookup estimator
#             state_one_hot = tf.one_hot(self.state, int(env.state_dim))
#             print("ONE HOT STATE", state_one_hot)


            h1 = tf.layers.dense(self.state, 24, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, 48, activation=tf.nn.relu)
            h3 = tf.layers.dense(h2, 24, activation=tf.nn.relu)
            self.output_layer = tf.layers.dense(h3, env.N_actions)
#             outputs = tf.nn.softmax(logits)

#             self.output_layer = tf.contrib.layers.fully_connected(
#                 inputs=self.state,
#                 num_outputs=env.N_actions,
#                 activation_fn=None,
#                 weights_initializer=tf.zeros_initializer)
            print("OUTPUT LAYER",self.output_layer)

            self.action_probs = tf.squeeze(tf.nn.softmax(self.output_layer))
            print("In Estimator Action Probs:",self.action_probs)
            print("Shape", self.action_probs.shape)
            self.picked_action_prob = tf.gather(self.action_probs, self.action)

            # Loss and train op
            self.loss = -tf.log(self.picked_action_prob) * self.target

            self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            self.train_op = self.optimizer.minimize(
                self.loss, global_step=tf.contrib.framework.get_global_step())
    
    def predict(self, state, sess=None):
        sess = sess or tf.get_default_session()
        return sess.run(self.action_probs, { self.state: state })

    def update(self, state, target, action, sess=None):
        sess = sess or tf.get_default_session()
        feed_dict = { self.state: state, self.target: target, self.action: action  }
        _, loss = sess.run([self.train_op, self.loss], feed_dict)
        return loss

In [None]:
class ValueEstimator():
    """
    Value Function approximator. 
    """
    
    def __init__(self, learning_rate=0.01, scope="value_estimator"):
        with tf.variable_scope(scope):
            self.state = tf.placeholder(tf.float32, [None,int(env.state_dim)], "state")
            self.target = tf.placeholder(dtype=tf.float32, name="target")


            
#             state_one_hot = tf.one_hot(self.state, int(env.state_dim))

            h1 = tf.layers.dense(self.state, 24, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, 48, activation=tf.nn.relu)
            h3 = tf.layers.dense(h2, 24, activation=tf.nn.relu)
            self.output_layer = tf.layers.dense(h3, 1)

#             self.output_layer = tf.contrib.layers.fully_connected(
#                 inputs=self.state,
#                 num_outputs=1,
#                 activation_fn=None,
#                 weights_initializer=tf.zeros_initializer)

            self.value_estimate = tf.squeeze(self.output_layer)
            self.loss = tf.squared_difference(self.value_estimate, self.target)

            self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            self.train_op = self.optimizer.minimize(
                self.loss, global_step=tf.contrib.framework.get_global_step())        
    
    def predict(self, state, sess=None):
        sess = sess or tf.get_default_session()
        return sess.run(self.value_estimate, { self.state: state })

    def update(self, state, target, sess=None):
        sess = sess or tf.get_default_session()
        feed_dict = { self.state: state, self.target: target }
        _, loss = sess.run([self.train_op, self.loss], feed_dict)
        return loss

In [None]:
def actor_critic(env, estimator_policy, estimator_value, num_episodes, discount_factor=1.0):
    """
    Actor Critic Algorithm. Optimizes the policy 
    function approximator using policy gradient.
    
    Args:
        env: OpenAI environment.
        estimator_policy: Policy Function to be optimized 
        estimator_value: Value function approximator, used as a critic
        num_episodes: Number of episodes to run for
        discount_factor: Time-discount factor
    
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """

    # Keeps track of useful statistics
    stats = plotting.EpisodeStats(
        episode_lengths=np.zeros(num_episodes),
        episode_rewards=np.zeros(num_episodes))    
    
    Transition = collections.namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])
    
    for i_episode in range(num_episodes):
        # Reset the environment and pick the fisrst action
        print("New Episode", i_episode)
        state = env.reset()
        user = User(0,env.action_dict)

        
        episode = []
        
        # One step in the environment
        for t in itertools.count():
            
            
            action_probs = estimator_policy.predict(state.reshape([1,5]))
            expl_chance = np.random.choice([1,0], p=[0.1,0.9])
            if (expl_chance == 1):
                action = np.random.choice(np.arange(len(action_probs)))
            else:
                action = np.argmax(action_probs)

                
#             allowed_actions, allowed_actions_probs = env.allowed_actions(action_probs)
#             while (not action in allowed_actions):
#                 action = np.random.choice(np.arange(len(action_probs)))
            print("ENV State BEFORE Step",state)
            next_state, done, reward = env.step(action,user)
            print("ENV State After Step",next_state)
            print("Step Rewards", reward)

            
            # Keep track of the transition
            episode.append(Transition(
              state=state, action=action, reward=reward, next_state=next_state, done=done))
            
            # Update statistics
            stats.episode_rewards[i_episode] += reward
            stats.episode_lengths[i_episode] = t
            
            # Calculate TD Target
            value_next = estimator_value.predict(next_state.reshape([1,5]))
            td_target = reward + discount_factor * value_next
            td_error = td_target - estimator_value.predict(state.reshape([1,5]))
            
            # Update the value estimator
            estimator_value.update(state.reshape([1,5]), td_target)
            
            # Update the policy estimator
            # using the td error as our advantage estimate
            estimator_policy.update(state.reshape([1,5]), td_error, action)
            
            # Print out which step we're on, useful for debugging.
            print("\rStep {} @ Episode {}/{} ({})".format(
                    t, i_episode , num_episodes, stats.episode_rewards[i_episode]), end="")

            if done:
                print("Final Dialog State", env.dialog_state)
                print("Episode DONE")
                break
                
            state = next_state
    
    return stats

In [None]:
tf.reset_default_graph()

global_step = tf.Variable(0, name="global_step", trainable=False)
policy_estimator = PolicyEstimator()
value_estimator = ValueEstimator()

with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    # Note, due to randomness in the policy the number of episodes you need to learn a good
    # policy may vary. ~300 seemed to work well for me.
    stats = actor_critic(env, policy_estimator, value_estimator, 1000, 0.8)

In [None]:
plotting.plot_episode_stats(stats, smoothing_window=10)

In [None]:
stats

In [None]:
isd = np.zeros(nS)

In [None]:
isd.shape

In [None]:
open_question_table = np.array([
                              ["open_yng_adult_life","open_yng_adult_car","open_yng_adult_dental","open_yng_adult_health","open_yng_adult_disability"],
                              ["open_middle-age_life","open_middle-age_car","open_middle-age_dental","open_middle-age_health","open_middle-age_disability"],
                              ["open_old-age_life","open_old-age_car","open_old-age_dental","open_old-age_health","open_old-age_disability"]
                               ])


In [None]:
insured_pos_question_table = np.array([
                              ["insured_pos_yng_adult_life","insured_pos_yng_adult_car","insured_pos_yng_adult_dental","insured_pos_yng_adult_health","insured_pos_yng_adult_disability"],
                              ["insured_pos_middle-age_life","insured_pos_middle-age_car","insured_pos_middle-age_dental","insured_pos_middle-age_health","insured_pos_middle-age_disability"],
                              ["insured_pos_old-age_life","insured_pos_old-age_car","insured_pos_old-age_dental","insured_pos_old-age_health","insured_pos_old-age_disability"]
                               ])


In [None]:
insured_neg_question_table = np.array([
                              ["insured_neg_yng_adult_life","insured_neg_yng_adult_car","insured_neg_yng_adult_dental","insured_neg_yng_adult_health","insured_neg_yng_adult_disability"],
                              ["insured_neg_middle-age_life","insured_neg_middle-age_car","insured_neg_middle-age_dental","insured_neg_middle-age_health","insured_neg_middle-age_disability"],
                              ["insured_neg_old-age_life","insured_neg_old-age_car","insured_neg_old-age_dental","insured_neg_old-age_health","insured_neg_old-age_disability"]
                               ])




In [None]:
close = np.array(["Is the customer interested?"])

In [None]:
def get_actions(open_question_table,insured_pos_question_table,insured_neg_question_table):
    
    action_dict = dict({"ask":{"young_adult": {val:idx for idx, val in enumerate(open_question_table[0])},
                "middle_age":{val:idx for idx, val in enumerate(open_question_table[1],len(open_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(open_question_table[2],2*len(open_question_table[2]))}},
         "insured_pos":{"young_adult": {val:idx for idx, val in enumerate(insured_pos_question_table[0],3*len(insured_pos_question_table[2]))},
                "middle_age":{val:idx for idx, val in enumerate(insured_pos_question_table[1],4*len(insured_pos_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(insured_pos_question_table[2],5*len(insured_pos_question_table[2]))}},
         "insured_neg":{"young_adult": {val:idx for idx, val in enumerate(insured_neg_question_table[0],6*len(insured_neg_question_table[0]))},
                "middle_age":{val:idx for idx, val in enumerate(insured_neg_question_table[1],7*len(insured_neg_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(insured_neg_question_table[2],8*len(insured_neg_question_table[2]))}}})
    return action_dict
    

In [None]:
dict({"ask":{"user_class1":q for q in list(open_question_table)[0:]}})

In [None]:
{"open": {"open1":1}}

In [None]:
a = dict({"ask":{"young_adult": {val:idx for idx, val in enumerate(open_question_table[0])},
                "middle_age":{val:idx for idx, val in enumerate(open_question_table[1],len(open_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(open_question_table[2],2*len(open_question_table[2]))}},
         "insured_pos":{"young_adult": {val:idx for idx, val in enumerate(insured_pos_question_table[0],3*len(insured_pos_question_table[2]))},
                "middle_age":{val:idx for idx, val in enumerate(insured_pos_question_table[1],4*len(insured_pos_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(insured_pos_question_table[2],5*len(insured_pos_question_table[2]))}},
         "insured_neg":{"young_adult": {val:idx for idx, val in enumerate(insured_neg_question_table[0],6*len(insured_neg_question_table[0]))},
                "middle_age":{val:idx for idx, val in enumerate(insured_neg_question_table[1],7*len(insured_neg_question_table[1]))},
                "old-age":{val:idx for idx, val in enumerate(insured_neg_question_table[2],8*len(insured_neg_question_table[2]))}}})

In [None]:
a['ask']['middle_age']

In [None]:
for idx, val in enumerate(open_question_table[0]):
    print(idx, val)

In [None]:
a = np.random.choice([0,1], p=[0.5,0.5])
for i in range(100000):
    print(a)
    

In [None]:
a

In [None]:
a = get_action_dict(open_question_table,insured_pos_question_table,insured_neg_question_table)

In [None]:
N_actions = 0
for key, value in a.items():
    for nestedkey,nestedvalue in a[key].items():
            N_actions = (len(a[key][nestedkey].items()) + N_actions)

#     N_actions = (len(a[key].items()) + N_actions)
print("No. actions:", N_actions)

In [None]:
len(a['open']['young_adult'].items())

In [None]:
t = get_action_dict(open_question_table,insured_pos_question_table,insured_neg_question_table)

In [None]:
N_actions = 0
for key, value in t.items():
    for nestedkey,nestedvalue in t[key].items():
            N_actions = (len(t[key][nestedkey].items()) + N_actions)

print("No. actions:", N_actions)

In [None]:
t

In [None]:
import numpy as np
for i in range(1000):
    print(np.random.choice([0,1], p=[0.1,0.9]))

In [None]:
a = np.array([0.3,0.5,0.6,0.34])

In [None]:
actions = [1,2]

In [None]:
a[actions]

In [None]:
a = np.array([1,2,3,4,5])
np.argmax(a)