In [1]:
import numpy as np
import random
from felibs.chapters.prologue import *
import warnings

warnings.filterwarnings("ignore")

In [2]:
#instantiate qtable

#5 spaces up, five spaces down, left, right but manhattan so triangle - 3 actions per move
action_space = 61*3
#dimensions of first map
state_space = 15*10

#qtable = np.zeros((state_space, action_space))
qtable = np.load('prologue_qtable_target_square_init.npy')
qtable.shape

(150, 183)

In [3]:
#these dictionaries allow us to go from states/actions to qtable indices
states_to_indices = {}
index = 0
for i in range(15):
    for j in range(10):
        states_to_indices[(i,j)] = index
        index += 1

indices_to_states = {}
for i, state in enumerate(states_to_indices.keys()):
    indices_to_states[i] = state

moves_and_actions_to_indices = {}
mdi = 0
for i in range(-5,6):
    for j in range(-5,6):
        #limiting our action space by manhattan distance
        if abs(i) + abs(j) > 5:
            continue
        else:
            for action in ['Wait', 'Item', 'Attack']:
                moves_and_actions_to_indices[(i,j, action)] = mdi
                mdi += 1
                
indices_to_moves_and_actions = {}
for i, action in enumerate(moves_and_actions_to_indices.keys()):
    indices_to_moves_and_actions[i] = action

In [4]:
#initialize qtable with values indicating which square is our target

start = (13, 2)
target = (4,7)

def dist(s, f):
    return ((f[0]-s[0])**2 + (f[1]-s[1])**2)**0.5

for i, row in enumerate(qtable):
    current_square = indices_to_states[i]
    for j, value in enumerate(row):
        action = indices_to_moves_and_actions[j]
        x_final = current_square[0] + action[0]
        y_final = current_square[1] + action[1]
        next_square = (x_final,y_final)
        init_value = 1 - (dist(next_square,target)/dist(start,target))
        qtable[i,j] = init_value

In [5]:
#Set hyperparameters

train_episodes = 100  #starting with 700 runs to go overnight
test_episodes = 5
max_turns = 15

learning_rate = 0.4
gamma = 0.5

#exploration/exploitation

exploration_rate = 1.0
#exploration_rate = 0.6
max_exp_rate = 1.0
#max_exp_rate = 0.5
min_exp_rate = 0.01
#increasing decay - I think this run will learn very quickly
er_decay = 0.09
current_episode = 0

In [12]:
#Select the Emulator
#time.sleep(2)

pyautogui.moveTo(7,54,0.2)
pyautogui.click()
time.sleep(0.3)


for episode in range(current_episode, train_episodes):
    reset_to_prologue()
    reward = 0
    ###starting spot is 13, 2
    state = (13,2)

    for step in range(max_turns):
        if reward == 2:
            break
        
        if state in [(2, 7), (3,6), (4,7)]:
            press_key('Enter')
            #break for the combat with boss
            time.sleep(5)
            info, reward = take_turn(False, qtable, (0,0,'Attack'))
            
        else:
            exp_exp_tradeoff = random.uniform(0,1)
            if exp_exp_tradeoff > exploration_rate:
                action_index = np.argmax(qtable[states_to_indices[state], :])
                act_tuple = indices_to_moves_and_actions[action_index]
                info, reward = take_turn(False, qtable, act_tuple)

            else:
                info, reward = take_turn(True, qtable, (0,0,''))
        
        #error in finding name means death
        if info[0] == '':
            qtable[states_to_indices[state],action_index] += -1
            break
        
        #makes a move to an impossible square
        if info[3] == "Invalid move":
            for choice in ['Wait', 'Item', 'Attack']:
                tup = (info[1], info[2], choice)
                action_index = moves_and_actions_to_indices[tup]
                qtable[states_to_indices[state],action_index] = -2
            break
        
        action_index = moves_and_actions_to_indices[tuple(info[1:])]
        
        new_state = [state[0] + info[1], state[1] + info[2]]
        
        #adjust new_state to reflect where we actually are
        if new_state[0] > 14:
            new_state[0] = 14
            reward -= 0.5
        elif new_state[0] < 0:
            new_state[0] = 0
            reward -= 0.5
        if new_state[1] > 9:
            new_state[1] = 9
            reward -= 0.5
        elif new_state[1] < 0:
            new_state[1] = 0
            reward -= 0.5
        
        new_state = tuple(new_state)
        
        reward += 1 - (dist(new_state,target)/dist(start,target))
        
        qtable[states_to_indices[state],action_index] = qtable[states_to_indices[state],action_index] + \
                                                            learning_rate * (reward + \
                                                            gamma*np.max(qtable[states_to_indices[new_state], :]) -\
                                                            qtable[states_to_indices[state],action_index])
        

        state = new_state
        
    episode += 1
    exploration_rate = min_exp_rate + (max_exp_rate - min_exp_rate)*np.exp(-er_decay*episode)
    
    np.save('prologue_qtable_target_square_init', qtable)
    current_episode = episode
    print(current_episode, exploration_rate)

In [14]:
#examine qtable
for state in range(150):
    print(qtable[state,:])

[-0.10743627 -0.10743627 -0.10743627 -0.09888451 -0.09888451 -0.09888451
 -0.03249103 -0.03249103 -0.03249103  0.02871414  0.02871414  0.02871414
 -0.10743627 -0.10743627 -0.10743627 -0.03249103 -0.03249103 -0.03249103
  0.03847605  0.03847605  0.03847605  0.10451868  0.10451868  0.10451868
  0.16446831  0.16446831  0.16446831 -0.13270423 -0.13270423 -0.13270423
 -0.05060629 -0.05060629 -0.05060629  0.02871414  0.02871414  0.02871414
  0.10451868  0.10451868  0.10451868  0.17583662  0.17583662  0.17583662
  0.24140149  0.24140149  0.24140149  0.2995958   0.2995958   0.2995958
 -0.17360917 -0.17360917 -0.17360917 -0.08593061 -0.08593061 -0.08593061
  0.          0.          0.          0.08369075  0.08369075  0.08369075
  0.16446831  0.16446831  0.16446831  0.24140149  0.24140149  0.24140149
  0.31319718  0.31319718  0.31319718  0.3780736   0.3780736   0.3780736
  0.43364789  0.43364789  0.43364789 -0.22859023 -0.22859023 -0.22859023
 -0.13686095 -0.13686095 -0.13686095 -0.04610689 -0.0

[ 0.31319718  0.31319718  0.31319718  0.22297131  0.22297131  0.22297131
  0.3200999   0.3200999   0.3200999   0.41722848  0.41722848  0.41722848
  0.12046324  0.12046324  0.12046324  0.2169243   0.2169243   0.2169243
  0.31319718  0.31319718  0.31319718  0.40918988  0.40918988  0.40918988
  0.50473944  0.50473944  0.50473944  0.00947889  0.00947889  0.00947889
  0.10451868  0.10451868  0.10451868  0.19905716  0.19905716  0.19905716
  0.29289322  0.29289322  0.29289322  0.38570488  0.38570488  0.38570488
  0.47694656  0.47694656  0.47694656  0.56562776  0.56562776  0.56562776
 -0.10743627 -0.10743627 -0.10743627 -0.01405221 -0.01405221 -0.01405221
  0.07855732  0.07855732  0.07855732  0.170133    0.170133    0.170133
  0.26029073  0.26029073  0.26029073  0.34844164  0.34844164  0.34844164
  0.43364789  0.43364789  0.43364789  0.51435707  0.51435707  0.51435707
  0.58791831  0.58791831  0.58791831 -0.22859023 -0.22859023 -0.22859023
 -0.13686095 -0.13686095 -0.13686095 -0.04610689 -0.04

In [7]:
#testing qtable values


#Select VBA
pyautogui.moveTo(7,54,0.2)
pyautogui.click()
time.sleep(0.3)

n_turns = 10

reset_to_prologue()
# set_options()
# time.sleep(1)
state = (13,2)
for i in range(n_turns):
    if state in [(2, 7), (3,6), (4,7)]:
        press_key('Enter')
        #break for the combat with boss
        time.sleep(5)
    
    #always take top-scoring action, no 'exploration'
    action_index = np.argmax(qtable[states_to_indices[state], :])
    act_tuple = indices_to_moves_and_actions[action_index]
    print(state, act_tuple)
    
    #take turn and update state
    info, reward = take_turn(False, qtable, act_tuple)
    state = (state[0] + act_tuple[0], state[1] + act_tuple[1])
    

(13, 2) (-5, 0, 'Wait')
(8, 2) (-2, 3, 'Wait')
(6, 5) (-2, 2, 'Wait')
(4, 7) (0, 0, 'Wait')
(4, 7) (0, 0, 'Wait')
(4, 7) (0, 0, 'Wait')
Broken for no name found
(4, 7) (0, 0, 'Wait')
Broken for no name found
(4, 7) (0, 0, 'Wait')
Broken for no name found


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/aaronfrederick/Desktop/3venv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-cd3a0811f1ab>", line 19, in <module>
    time.sleep(5)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/aaronfrederick/Desktop/3venv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2033, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/aaronfrederick/Desktop/3venv/lib/python3.7/site-packages/IPython/core/ultratb.py", line 1095, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/Users/aaro

KeyboardInterrupt: 

In [17]:
#This cell is an example of the output of take_turn

#Select VBA
pyautogui.moveTo(7,54,0.2)
pyautogui.click()
time.sleep(0.3)

reset_to_prologue()
info, reward = take_turn(True, qtable)
print('Name:', info[0])
print('Movement:', (info[1],info[2]))
print('Selection:', info[3])
print('Enemies Killed:', reward)

Name: Lyn
Movement: (4, 1)
Selection: Item
Enemies Killed: 0
