# Lero's Quest
## Value Iteration Algorithm

In [45]:
import numpy as np
from copy import deepcopy
from functools import reduce
from operator import add

In [46]:
HEALTH_RANGE = 5
ARROWS_RANGE = 4
STAMINA_RANGE = 3

HEALTH_VALUES = tuple(range(HEALTH_RANGE))
ARROWS_VALUES = tuple(range(ARROWS_RANGE))
STAMINA_VALUES = tuple(range(STAMINA_RANGE))

HEALTH_FACTOR = 25 # 0, 25, 50, 75, 100
ARROWS_FACTOR = 1 # 0, 1, 2, 3
STAMINA_FACTOR = 50 # 0, 50, 100

NUM_ACTIONS = 3
ACTION_SHOOT = 0
ACTION_DODGE = 1
ACTION_RECHARGE = 2

TEAM = 85
Y = [1/2, 1,2]
PRIZE = 10
COST = -10/Y[TEAM%3]

GAMMA = 0.99
DELTA = 0.001

In [47]:
class State:
    def __init__(self, enemy_health, num_arrows, stamina):
        if (enemy_health not in HEALTH_VALUES) or (num_arrows not in ARROWS_VALUES) or (stamina not in STAMINA_VALUES):
            raise ValueError
        
        self.health = enemy_health 
        self.arrows = num_arrows 
        self.stamina = stamina 

    def show(self):
        return (self.health, self.arrows, self.stamina)

    def __str__(self):
        return f'({self.health},{self.arrows},{self.stamina})'


In [48]:
REWARD = np.zeros((HEALTH_RANGE, ARROWS_RANGE, STAMINA_RANGE))
REWARD[0, :, :] = PRIZE

In [49]:
def action(action_type, state):
    # returns cost, array of tuple of (probability, state)
    
    state = State(*state)

    if action_type == ACTION_SHOOT:
        if state.arrows == 0 or state.stamina == 0:
            return None, None

        new_arrows = state.arrows - 1
        new_stamina = state.stamina - 1

        choices = []
        choices.append((0.5, State(max(HEALTH_VALUES[0],state.health-1), new_arrows, new_stamina)))
        choices.append((0.5, State(state.health, new_arrows, new_stamina)))

    elif action_type == ACTION_RECHARGE:
        choices = []
        choices.append((0.8, State(state.health, state.arrows, min(STAMINA_VALUES[-1], state.stamina+1))))
        choices.append((0.2, State(state.health, state.arrows, state.stamina)))

    elif action_type == ACTION_DODGE:
        if state.stamina == 0:
            return None, None

        choices = []
        choices.append((0.64, State(state.health,min(ARROWS_VALUES[-1], state.arrows+1),max(STAMINA_VALUES[0],state.stamina -1))))
        choices.append((0.16, State(state.health,state.arrows,max(STAMINA_VALUES[0],state.stamina -1))))
        choices.append((0.04, State(state.health, state.arrows,max(STAMINA_VALUES[0],state.stamina -2))))
        choices.append((0.16, State(state.health, min(ARROWS_VALUES[-1] ,state.arrows+1), max(STAMINA_VALUES[0],state.stamina -2))))

    cost = 0
    for choice in choices:
        cost += choice[0] * (COST + REWARD[choice[1].show()])
        
    return cost, choices


In [50]:
def show(i, utilities, policies):
    print('iteration={}'.format(i))
    utilities = np.around(utilities, 3)
    for state, util in np.ndenumerate(utilities):
        state = State(*state)
        if state.health == 0:
            print('{}:-1=[{:.3f}]'.format(state,util))
            continue
        
        if policies[state.show()] == ACTION_SHOOT:
            act_str = 'SHOOT'
        elif policies[state.show()] == ACTION_DODGE:
            act_str = 'DODGE'
        elif policies[state.show()] == ACTION_RECHARGE:
            act_str = 'RECHARGE'
        
        print('{}:{}=[{:.3f}]'.format(state,act_str,util))
    print('\n')

In [51]:
def value_iteration():
    utilities = np.zeros((HEALTH_RANGE, ARROWS_RANGE, STAMINA_RANGE))
    policies = np.full((HEALTH_RANGE, ARROWS_RANGE, STAMINA_RANGE), -1, dtype='int')

    index = 0
    done = False
    while not done: # one iteration of value iteration
        #print(index)
        temp = np.zeros(utilities.shape)
        delta = np.NINF
        
        for state, util in np.ndenumerate(utilities):
            if state[0] == 0:
                continue
            new_util = np.NINF
            for act_index in range(NUM_ACTIONS):
                cost, states = action(act_index, state)
                
                if cost is None:
                    continue

                expected_util = reduce(add, map(lambda x: x[0]*utilities[x[1].show()], states))
                new_util = max(new_util, cost + GAMMA * expected_util)
            
            temp[state] = new_util
            delta = max(delta, abs(util - new_util))
        
        utilities = deepcopy(temp)

        for state, _ in np.ndenumerate(utilities):
            if state[0] == 0:
                continue
            best_util = np.NINF
            best_action = None

            for act_index in range(NUM_ACTIONS):
                cost, states = action(act_index, state)

                if states is None:
                    continue

                action_util = cost + GAMMA * reduce(add, map(lambda x: x[0]*utilities[x[1].show()], states))
                
                if action_util > best_util:
                    best_action = act_index
                    best_util = action_util
                    
            policies[state] = best_action

        show(index, utilities, policies)
        index +=1
        if delta < DELTA:
            done = True
    return index
        

In [52]:
iters = value_iteration()

iteration=0
(0,0,0):-1=[0.000]
(0,0,1):-1=[0.000]
(0,0,2):-1=[0.000]
(0,1,0):-1=[0.000]
(0,1,1):-1=[0.000]
(0,1,2):-1=[0.000]
(0,2,0):-1=[0.000]
(0,2,1):-1=[0.000]
(0,2,2):-1=[0.000]
(0,3,0):-1=[0.000]
(0,3,1):-1=[0.000]
(0,3,2):-1=[0.000]
(1,0,0):RECHARGE=[-10.000]
(1,0,1):DODGE=[-10.000]
(1,0,2):DODGE=[-10.000]
(1,1,0):RECHARGE=[-10.000]
(1,1,1):SHOOT=[-5.000]
(1,1,2):SHOOT=[-5.000]
(1,2,0):RECHARGE=[-10.000]
(1,2,1):SHOOT=[-5.000]
(1,2,2):SHOOT=[-5.000]
(1,3,0):RECHARGE=[-10.000]
(1,3,1):SHOOT=[-5.000]
(1,3,2):SHOOT=[-5.000]
(2,0,0):RECHARGE=[-10.000]
(2,0,1):DODGE=[-10.000]
(2,0,2):DODGE=[-10.000]
(2,1,0):RECHARGE=[-10.000]
(2,1,1):SHOOT=[-10.000]
(2,1,2):SHOOT=[-10.000]
(2,2,0):RECHARGE=[-10.000]
(2,2,1):SHOOT=[-10.000]
(2,2,2):SHOOT=[-10.000]
(2,3,0):RECHARGE=[-10.000]
(2,3,1):SHOOT=[-10.000]
(2,3,2):SHOOT=[-10.000]
(3,0,0):RECHARGE=[-10.000]
(3,0,1):DODGE=[-10.000]
(3,0,2):DODGE=[-10.000]
(3,1,0):RECHARGE=[-10.000]
(3,1,1):SHOOT=[-10.000]
(3,1,2):SHOOT=[-10.000]
(3,2,0):RECHARGE

(1,1,2):SHOOT=[-38.682]
(1,2,0):RECHARGE=[-43.654]
(1,2,1):SHOOT=[-32.018]
(1,2,2):SHOOT=[-26.480]
(1,3,0):RECHARGE=[-38.259]
(1,3,1):SHOOT=[-26.411]
(1,3,2):SHOOT=[-20.681]
(2,0,0):RECHARGE=[-140.062]
(2,0,1):DODGE=[-133.316]
(2,0,2):DODGE=[-127.497]
(2,1,0):RECHARGE=[-124.356]
(2,1,1):SHOOT=[-116.446]
(2,1,2):SHOOT=[-108.082]
(2,2,0):RECHARGE=[-106.105]
(2,2,1):SHOOT=[-97.202]
(2,2,2):SHOOT=[-87.906]
(2,3,0):RECHARGE=[-92.576]
(2,3,1):SHOOT=[-82.894]
(2,3,2):SHOOT=[-72.876]
(3,0,0):RECHARGE=[-166.934]
(3,0,1):DODGE=[-164.356]
(3,0,2):DODGE=[-161.893]
(3,1,0):RECHARGE=[-160.632]
(3,1,1):DODGE=[-156.897]
(3,1,2):SHOOT=[-152.648]
(3,2,0):RECHARGE=[-151.715]
(3,2,1):DODGE=[-146.821]
(3,2,2):SHOOT=[-141.412]
(3,3,0):RECHARGE=[-140.189]
(3,3,1):SHOOT=[-134.134]
(3,3,2):SHOOT=[-127.604]
(4,0,0):RECHARGE=[-173.286]
(4,0,1):DODGE=[-172.843]
(4,0,2):DODGE=[-172.319]
(4,1,0):RECHARGE=[-172.077]
(4,1,1):SHOOT=[-171.110]
(4,1,2):SHOOT=[-169.860]
(4,2,0):RECHARGE=[-169.618]
(4,2,1):SHOOT=[-167.972

iteration=36
(0,0,0):-1=[0.000]
(0,0,1):-1=[0.000]
(0,0,2):-1=[0.000]
(0,1,0):-1=[0.000]
(0,1,1):-1=[0.000]
(0,1,2):-1=[0.000]
(0,2,0):-1=[0.000]
(0,2,1):-1=[0.000]
(0,2,2):-1=[0.000]
(0,3,0):-1=[0.000]
(0,3,1):-1=[0.000]
(0,3,2):-1=[0.000]
(1,0,0):RECHARGE=[-84.738]
(1,0,1):DODGE=[-73.256]
(1,0,2):DODGE=[-63.945]
(1,1,0):RECHARGE=[-58.746]
(1,1,1):SHOOT=[-46.912]
(1,1,2):SHOOT=[-41.233]
(1,2,0):RECHARGE=[-46.068]
(1,2,1):SHOOT=[-34.057]
(1,2,2):SHOOT=[-28.202]
(1,3,0):RECHARGE=[-39.888]
(1,3,1):SHOOT=[-27.788]
(1,3,2):SHOOT=[-21.845]
(2,0,0):RECHARGE=[-167.591]
(2,0,1):DODGE=[-157.642]
(2,0,2):DODGE=[-149.525]
(2,1,0):RECHARGE=[-145.009]
(2,1,1):SHOOT=[-134.622]
(2,1,2):SHOOT=[-124.050]
(2,2,0):RECHARGE=[-121.453]
(2,2,1):SHOOT=[-110.656]
(2,2,2):SHOOT=[-99.683]
(2,3,0):RECHARGE=[-103.879]
(2,3,1):SHOOT=[-92.777]
(2,3,2):SHOOT=[-81.506]
(3,0,0):RECHARGE=[-233.470]
(3,0,1):DODGE=[-225.748]
(3,0,2):DODGE=[-219.337]
(3,1,0):RECHARGE=[-215.804]
(3,1,1):RECHARGE=[-207.436]
(3,1,2):SHOOT=[-

(3,1,0):RECHARGE=[-226.220]
(3,1,1):DODGE=[-216.715]
(3,1,2):SHOOT=[-207.059]
(3,2,0):RECHARGE=[-204.683]
(3,2,1):DODGE=[-194.839]
(3,2,2):SHOOT=[-184.847]
(3,3,0):RECHARGE=[-182.387]
(3,3,1):SHOOT=[-172.211]
(3,3,2):SHOOT=[-161.888]
(4,0,0):RECHARGE=[-310.819]
(4,0,1):DODGE=[-303.222]
(4,0,2):DODGE=[-296.983]
(4,1,0):RECHARGE=[-293.524]
(4,1,1):DODGE=[-285.481]
(4,1,2):SHOOT=[-277.251]
(4,2,0):RECHARGE=[-275.240]
(4,2,1):DODGE=[-266.781]
(4,2,2):SHOOT=[-258.145]
(4,3,0):RECHARGE=[-256.030]
(4,3,1):SHOOT=[-247.178]
(4,3,2):SHOOT=[-238.157]


iteration=54
(0,0,0):-1=[0.000]
(0,0,1):-1=[0.000]
(0,0,2):-1=[0.000]
(0,1,0):-1=[0.000]
(0,1,1):-1=[0.000]
(0,1,2):-1=[0.000]
(0,2,0):-1=[0.000]
(0,2,1):-1=[0.000]
(0,2,2):-1=[0.000]
(0,3,0):-1=[0.000]
(0,3,1):-1=[0.000]
(0,3,2):-1=[0.000]
(1,0,0):RECHARGE=[-85.146]
(1,0,1):DODGE=[-73.600]
(1,0,2):DODGE=[-64.247]
(1,1,0):RECHARGE=[-59.022]
(1,1,1):SHOOT=[-47.145]
(1,1,2):SHOOT=[-41.430]
(1,2,0):RECHARGE=[-46.254]
(1,2,1):SHOOT=[-34.214]
(1,2,2):SH

iteration=68
(0,0,0):-1=[0.000]
(0,0,1):-1=[0.000]
(0,0,2):-1=[0.000]
(0,1,0):-1=[0.000]
(0,1,1):-1=[0.000]
(0,1,2):-1=[0.000]
(0,2,0):-1=[0.000]
(0,2,1):-1=[0.000]
(0,2,2):-1=[0.000]
(0,3,0):-1=[0.000]
(0,3,1):-1=[0.000]
(0,3,2):-1=[0.000]
(1,0,0):RECHARGE=[-85.175]
(1,0,1):DODGE=[-73.625]
(1,0,2):DODGE=[-64.268]
(1,1,0):RECHARGE=[-59.042]
(1,1,1):SHOOT=[-47.161]
(1,1,2):SHOOT=[-41.444]
(1,2,0):RECHARGE=[-46.267]
(1,2,1):SHOOT=[-34.225]
(1,2,2):SHOOT=[-28.345]
(1,3,0):RECHARGE=[-40.023]
(1,3,1):SHOOT=[-27.902]
(1,3,2):SHOOT=[-21.941]
(2,0,0):RECHARGE=[-171.405]
(2,0,1):DODGE=[-160.953]
(2,0,2):DODGE=[-152.485]
(2,1,0):RECHARGE=[-147.755]
(2,1,1):SHOOT=[-137.002]
(2,1,2):SHOOT=[-126.112]
(2,2,0):RECHARGE=[-123.424]
(2,2,1):SHOOT=[-112.361]
(2,2,2):SHOOT=[-101.158]
(2,3,0):RECHARGE=[-105.288]
(2,3,1):RECHARGE=[-93.994]
(2,3,2):SHOOT=[-82.558]
(3,0,0):RECHARGE=[-249.149]
(3,0,1):DODGE=[-239.735]
(3,0,2):DODGE=[-232.102]
(3,1,0):RECHARGE=[-227.841]
(3,1,1):SHOOT=[-218.140]
(3,1,2):SHOOT=[

(1,2,2):SHOOT=[-28.346]
(1,3,0):RECHARGE=[-40.024]
(1,3,1):SHOOT=[-27.903]
(1,3,2):SHOOT=[-21.942]
(2,0,0):RECHARGE=[-171.466]
(2,0,1):DODGE=[-161.005]
(2,0,2):DODGE=[-152.532]
(2,1,0):RECHARGE=[-147.798]
(2,1,1):RECHARGE=[-137.039]
(2,1,2):SHOOT=[-126.143]
(2,2,0):RECHARGE=[-123.454]
(2,2,1):SHOOT=[-112.387]
(2,2,2):SHOOT=[-101.180]
(2,3,0):RECHARGE=[-105.309]
(2,3,1):SHOOT=[-94.012]
(2,3,2):SHOOT=[-82.574]
(3,0,0):RECHARGE=[-249.563]
(3,0,1):DODGE=[-240.097]
(3,0,2):DODGE=[-232.428]
(3,1,0):RECHARGE=[-228.144]
(3,1,1):SHOOT=[-218.405]
(3,1,2):SHOOT=[-208.542]
(3,2,0):RECHARGE=[-206.107]
(3,2,1):DODGE=[-196.088]
(3,2,2):SHOOT=[-185.942]
(3,3,0):RECHARGE=[-183.437]
(3,3,1):RECHARGE=[-173.130]
(3,3,2):SHOOT=[-162.693]
(4,0,0):RECHARGE=[-320.063]
(4,0,1):DODGE=[-311.523]
(4,0,2):DODGE=[-304.601]
(4,1,0):RECHARGE=[-300.736]
(4,1,1):SHOOT=[-291.941]
(4,1,2):SHOOT=[-283.031]
(4,2,0):RECHARGE=[-280.832]
(4,2,1):SHOOT=[-271.778]
(4,2,2):SHOOT=[-262.605]
(4,3,0):RECHARGE=[-260.342]
(4,3,1):REC

(3,0,1):DODGE=[-240.144]
(3,0,2):DODGE=[-232.470]
(3,1,0):RECHARGE=[-228.183]
(3,1,1):DODGE=[-218.439]
(3,1,2):SHOOT=[-208.571]
(3,2,0):RECHARGE=[-206.135]
(3,2,1):SHOOT=[-196.112]
(3,2,2):SHOOT=[-185.963]
(3,3,0):RECHARGE=[-183.457]
(3,3,1):SHOOT=[-173.148]
(3,3,2):SHOOT=[-162.708]
(4,0,0):RECHARGE=[-320.351]
(4,0,1):DODGE=[-311.777]
(4,0,2):DODGE=[-304.831]
(4,1,0):RECHARGE=[-300.951]
(4,1,1):DODGE=[-292.130]
(4,1,2):SHOOT=[-283.198]
(4,2,0):RECHARGE=[-280.993]
(4,2,1):SHOOT=[-271.918]
(4,2,2):SHOOT=[-262.729]
(4,3,0):RECHARGE=[-260.461]
(4,3,1):SHOOT=[-251.126]
(4,3,2):SHOOT=[-241.673]


iteration=102
(0,0,0):-1=[0.000]
(0,0,1):-1=[0.000]
(0,0,2):-1=[0.000]
(0,1,0):-1=[0.000]
(0,1,1):-1=[0.000]
(0,1,2):-1=[0.000]
(0,2,0):-1=[0.000]
(0,2,1):-1=[0.000]
(0,2,2):-1=[0.000]
(0,3,0):-1=[0.000]
(0,3,1):-1=[0.000]
(0,3,2):-1=[0.000]
(1,0,0):RECHARGE=[-85.180]
(1,0,1):DODGE=[-73.629]
(1,0,2):DODGE=[-64.272]
(1,1,0):RECHARGE=[-59.045]
(1,1,1):SHOOT=[-47.164]
(1,1,2):SHOOT=[-41.446]
(1,2,0):RE

(0,0,0):-1=[0.000]
(0,0,1):-1=[0.000]
(0,0,2):-1=[0.000]
(0,1,0):-1=[0.000]
(0,1,1):-1=[0.000]
(0,1,2):-1=[0.000]
(0,2,0):-1=[0.000]
(0,2,1):-1=[0.000]
(0,2,2):-1=[0.000]
(0,3,0):-1=[0.000]
(0,3,1):-1=[0.000]
(0,3,2):-1=[0.000]
(1,0,0):RECHARGE=[-85.180]
(1,0,1):DODGE=[-73.629]
(1,0,2):DODGE=[-64.272]
(1,1,0):RECHARGE=[-59.045]
(1,1,1):SHOOT=[-47.164]
(1,1,2):SHOOT=[-41.446]
(1,2,0):RECHARGE=[-46.269]
(1,2,1):SHOOT=[-34.227]
(1,2,2):SHOOT=[-28.346]
(1,3,0):RECHARGE=[-40.024]
(1,3,1):SHOOT=[-27.903]
(1,3,2):SHOOT=[-21.942]
(2,0,0):RECHARGE=[-171.473]
(2,0,1):DODGE=[-161.012]
(2,0,2):DODGE=[-152.537]
(2,1,0):RECHARGE=[-147.803]
(2,1,1):SHOOT=[-137.043]
(2,1,2):SHOOT=[-126.147]
(2,2,0):RECHARGE=[-123.457]
(2,2,1):RECHARGE=[-112.390]
(2,2,2):SHOOT=[-101.182]
(2,3,0):RECHARGE=[-105.311]
(2,3,1):SHOOT=[-94.015]
(2,3,2):SHOOT=[-82.575]
(3,0,0):RECHARGE=[-249.625]
(3,0,1):DODGE=[-240.150]
(3,0,2):DODGE=[-232.475]
(3,1,0):RECHARGE=[-228.188]
(3,1,1):DODGE=[-218.443]
(3,1,2):SHOOT=[-208.575]
(3,

In [53]:
iters

119