In [1]:
import numpy as np

In [2]:
ACTION_SIZE = 100       # guessed numbers are 0-99
LOWER = 0
UPPER = 100
N_EXPERIENCES = 1000
MAX_GUESSES = 100
MIN_ALPHA = 0.02
ALPHAS = np.linspace(1.0, MIN_ALPHA, N_EXPERIENCES)
GAMMA = 0.9
EPSILON = 0.1

DUMP_REWARD = -10
SMART_REWARD = 50
DONE_REWARD = 100

In [3]:
# Game state: 
#   - lower: (n >= lower)
#   - upper: (n <= upper)
#  
# Action: A number in [0, 99]
# Q-values (state, action): action with highest Qvalue will be chosen for that state

In [4]:
# Utility functions to convert/decode lower, upper <-> state
def decode_state(state):
  if state < 1000:
    return 0, state
  upper = state % 1000
  lower = state // 1000
  return lower, upper
  
def encode_state(lower, upper):
  return lower * 10**3 + upper

In [5]:
class GameAgent:
  def __init__(self):
    self.q_table = {} # A dictionary, each key is a state, each value is a list of all Q-values for that state
    self.guessed = [] # A list to store all guessed numbers (1- already guessed, 0- not yet)
  
  # reset agent after each game
  def reset(self):
    self.guessed = [0] * ACTION_SIZE

  def number_was_not_guessed(self, number):
    return self.guessed[number] == 0
  
  def random_policy(self, state):
    lower, upper = decode_state(state)
    # TODO: implement random policy
    # return a valid action (number) that was not guessed before
    
  
  # return the action that have Q(state, action) max
  def policy(self, state):
    pass
    # TODO: find the action that have Q(state, action) max
    # set the action to 1 in self.guessed
    # return the action
  
  def choose_action(self, state):
    pass
    # TODO: implement epsilon-greedy policy
    
  
  # return the Q value based on state & action: Q(state, action)
  def Q(self, state, action=None):
    # if it's a new state, add to the q_table dictionary with all Q values to zeros
    if state not in self.q_table:
      self.q_table[state] = np.zeros(ACTION_SIZE)
    # if no action, return all q values
    if action is None:
      return self.q_table[state]    
    
    return self.q_table[state][action] # Return Qvalue of (state, action) pair

class Environment:
  def __init__(self):
    self.state = 0
    self.number = -1
  
  def reset(self, interact=False):
    self.number = int(input('Enter a number to guess: ')) if interact else np.random.randint(LOWER, UPPER)
    self.state = encode_state(LOWER, UPPER)
    return self.number, self.state
  
  def perform(self, action): 
    lower, upper = decode_state(self.state)
    done = False

    if action == self.number:
      done = True
      reward = DONE_REWARD
    else:
      if action < lower or action > upper:
        reward = DUMP_REWARD
      else:
        reward = (SMART_REWARD - np.abs((upper + lower) // 2 - action)) **3
    
    # udpate lower and upper
    if action < self.number:
      lower = action if action > lower else lower
    elif action > self.number:
      upper = action if action < upper else upper
    # get new state from lower, upper
    self.state = encode_state(lower, upper)
    
    return self.state, reward, done

class GuessingGameModel:
  def train(self, verbose=False):
    agent = GameAgent()
    env = Environment()

    for exp in range(N_EXPERIENCES):
      agent.reset()
      _, state = env.reset()

      total_reward = 0
      alpha = ALPHAS[exp]
      done = False
      n_guesses = 0

      while not done:
        # TODO:
        # 1. tell agent to choose an action 
        # 2. increase number of guesses
        # 4. update environment by calling perform method and get next state, reward, and is_done
        # 5. update q_table of agent by bellman equation
        # 6. update state & check if done
        # 7. if verbose, print out total reward and number of guesses

    # after training for a while, return trained agent
    return agent

  def play(self, agent):
    agent.reset()
    env = Environment()
    n, state = env.reset(interact=True)
    n_guesses = 0
    done = False

    while not done:
      action = agent.policy(state)
      print('Agent guess: {0}'.format(action))
      n_guesses += 1
      if action == n:
        print('Your guess is correct after {0} times'.format(n_guesses))
      elif action < n:
        print('Your guess is less than {0}'.format(n))
      elif action > n:
        print('Your guess is greater than {0}'.format(n))
      
      state, _, done = env.perform(action)

      if n_guesses >= MAX_GUESSES:
        print('You\'re so dump that you\'re out of possible guesses')
        done = True

In [None]:
model = GuessingGameModel()
agent = model.train(verbose=True)

In [10]:
model.play(agent)

Agent guess: 45
Your guess is greater than 25
Agent guess: 34
Your guess is greater than 25
Agent guess: 20
Your guess is less than 25
Agent guess: 25
Your guess is correct after 4 times
