In [None]:
import nbimporter
nbimporter.options['only_defs'] = False
import numpy as np
from copy import deepcopy
import random
from file_utils import save_pickle, load_pickle
from pacman_game import Action, ActionEvent, get_next_game_state_from_action, initialize_gamestate_from_file

In [None]:
def calculate_reward_for_move(action_event):
    if action_event == ActionEvent.DOT:
        return 2
    elif action_event == ActionEvent.CAPTURED_BY_GHOST:
        return -5
    elif action_event == ActionEvent.NONE:
        return -0.1
    elif action_event == ActionEvent.WALL:
        return -0.1
    elif action_event == ActionEvent.WON:
        return 20
    elif action_event == ActionEvent.LOST:
        return -10
    return 0

In [None]:
q_table = {}

In [None]:
def pick_optimal_action(state, printing=False):
    max_value = max(q_table[state].values())
    actions = [key for key in q_table[state] if q_table[state][key] == max_value]

    if printing:
        print(state)
        print(q_table[state])

    return random.choice(actions)

In [None]:
def pick_action(game_state):
    exploration_prob = 0.30
    
    if game_state not in q_table:
        q_table[game_state] = {key: 0.0 for key in Action.get_all_actions()}
        
    if exploration_prob > np.random.rand():
        # Explore
        return np.random.choice(Action.get_all_actions())
    else:
        # Exploit
        return pick_optimal_action(game_state)

In [None]:
def compute_max_q_value(state):
    if state not in q_table:
        q_table[state] = {key: 0.0 for key in Action.get_all_actions()}

    return max(q_table[state].values())

In [None]:
def train(level='level-0', num_episodes=10):
    initial_game_state = initialize_gamestate_from_file(level)
    discount = 0.8
    alpha = 0.2

    for i in range(num_episodes):
        current_game_state = deepcopy(initial_game_state)
        episode_step = 0
        episode_done = False
        if i % 50 == 0:
                print("Iteration number", i)
        while not episode_done:
            action = pick_action(current_game_state)
            new_game_state, action_event = get_next_game_state_from_action(current_game_state, action.name)

            if action_event == ActionEvent.WON or action_event == ActionEvent.LOST:
                episode_done = True
                if action_event == ActionEvent.WON:
                    print("Won!!")

            reward = calculate_reward_for_move(action_event)

            if current_game_state not in q_table:
                q_table[current_game_state] = {key: 0.0 for key in Action.get_all_actions()}

            q_table[current_game_state][action] = q_table[current_game_state][action] + alpha * (reward + (discount * compute_max_q_value(new_game_state)) - q_table[current_game_state][action])

            current_game_state = new_game_state
            
            episode_step += 1
            if episode_step > 500:
                break

    save_pickle('./q_table', q_table, True)

In [None]:
train(num_episodes=250)