# Lero's Quest
## Value Iteration Algorithm

In [1]:
import numpy as np
from copy import deepcopy
from functools import reduce
from operator import add

In [10]:
HEALTH_RANGE = 2
ARROWS_RANGE = 2
STAMINA_RANGE = 2

HEALTH_VALUES = tuple(range(HEALTH_RANGE))
ARROWS_VALUES = tuple(range(ARROWS_RANGE))
STAMINA_VALUES = tuple(range(STAMINA_RANGE))

HEALTH_FACTOR = 25 # 0, 25, 50, 75, 100
ARROWS_FACTOR = 1 # 0, 1, 2, 3
STAMINA_FACTOR = 50 # 0, 50, 100

NUM_ACTIONS = 3
ACTION_SHOOT = 2
ACTION_DODGE = 1
ACTION_RECHARGE = 0

TEAM = 85
Y = [1/2, 1,2]
PRIZE = 10
COST = -2.5

GAMMA = 0.1
DELTA = 1e-10

In [11]:
class State:
    def __init__(self, enemy_health, num_arrows, stamina):
        if (enemy_health not in HEALTH_VALUES) or (num_arrows not in ARROWS_VALUES) or (stamina not in STAMINA_VALUES):
            raise ValueError
        
        self.health = enemy_health 
        self.arrows = num_arrows 
        self.stamina = stamina 

    def show(self):
        return (self.health, self.arrows, self.stamina)

    def get_index(self):
        return ((ARROWS_RANGE + STAMINA_RANGE) * self.health +
                STAMINA_RANGE * self.arrows +
                self.stamina)
    
    def is_final(self):
        return (self.health == 0)

    def __str__(self):
        return f'({self.health * HEALTH_FACTOR}, {self.arrows * ARROWS_FACTOR}, {self.stamina * STAMINA_FACTOR})'

    @classmethod
    def from_index(cls, index):
        if index not in range(60):
            raise ValueError

        enemy_health = index // (ARROWS_RANGE + STAMINA_RANGE)
        index = index % (ARROWS_RANGE + STAMINA_RANGE)

        num_arrows = index // STAMINA_RANGE
        index = index % STAMINA_RANGE

        stamina = index

        return State(enemy_health, num_arrows, stamina)


In [12]:
REWARD = np.zeros((HEALTH_RANGE, ARROWS_RANGE, STAMINA_RANGE))
REWARD[0, :, :] = PRIZE

In [13]:
def action(action_type, state):
    # returns cost, array of tuple of (probability, state)
    
    state = State(*state)

    if action_type == ACTION_SHOOT:
        if state.arrows == 0 or state.stamina == 0:
            return None, None

        new_arrows = state.arrows - 1
        new_stamina = state.stamina - 1

        choices = []
        choices.append((0.5, State(max(HEALTH_VALUES[0],state.health-1), new_arrows, new_stamina)))
        choices.append((0.5, State(state.health, new_arrows, new_stamina)))

    elif action_type == ACTION_RECHARGE:
        choices = []
        choices.append((1, State(state.health, state.arrows, min(STAMINA_VALUES[-1], state.stamina+1))))
        #choices.append((0.2, State(state.health, state.arrows, state.stamina)))

    elif action_type == ACTION_DODGE:
        if state.stamina == 0:
            return None, None

        if state.stamina == 2: #if stamina is 100  
            choices = []
            #choices.append((0.64, State(state.health,min(ARROWS_VALUES[-1], state.arrows+1),state.stamina -1)))
            choices.append((0.8, State(state.health,state.arrows,STAMINA_VALUES[1])))
            choices.append((0.2, State(state.health, state.arrows,STAMINA_VALUES[0])))
            #choices.append((0.16, State(state.health, min(ARROWS_VALUES[-1] ,state.arrows+1), STAMINA_VALUES[0])))

        elif state.stamina == 1: # if stamina is 50
            choices = []
            choices.append((1, State(state.health,state.arrows,0)))
            #choices.append((0.8, State(state.health,min(state.arrows+1,ARROWS_VALUES[-1]),0)))

    cost = 0
    for choice in choices:
        cost += choice[0] * (COST + REWARD[choice[1].show()])
        
    return cost, choices


In [14]:
def show(i, utilities, policies):
    print(f'iteration={i}')

    for state, util in np.ndenumerate(utilities):
        round(util)
        util_str = '{:.3f}'.format(util)
        
        if state[0] == 0:
            print(f'{state}:{-1}=[{util_str}]')
            continue
        
        if policies[state] == ACTION_SHOOT:
            act_str = 'SHOOT'
        elif policies[state] == ACTION_DODGE:
            act_str = 'DODGE'
        elif policies[state] == ACTION_RECHARGE:
            act_str = 'RECHARGE'
        
        print(f'{state}:{act_str}=[{util_str}]')

In [15]:
def value_iteration():
    utilities = np.zeros((HEALTH_RANGE, ARROWS_RANGE, STAMINA_RANGE))
    policies = np.full((HEALTH_RANGE, ARROWS_RANGE, STAMINA_RANGE), -1, dtype='int')

    index = 0
    while True: # one iteration of value iteration
        temp = np.zeros(utilities.shape, dtype='double')
        delta = 0
        
        for state, util in np.ndenumerate(utilities):
            if state[0] == 0:
                continue
            new_util = np.NINF

            for act_index in range(NUM_ACTIONS):
                cost, states = action(act_index, state)
                
                if cost is None:
                    continue

                expected_util = reduce(add, map(lambda x: x[0]*utilities[x[1].show()], states))
                new_util = max(new_util, cost + GAMMA * expected_util)
            
            temp[state] = new_util
            delta = max(delta, abs(util - new_util))
        
        utilities = deepcopy(temp)

        for state, _ in np.ndenumerate(utilities):
            if state[0] == 0:
                continue
            best_util = np.NINF
            best_action = None

            for act_index in range(NUM_ACTIONS):
                states = action(act_index, state)[1]

                if states is None:
                    continue

                action_util = cost+ GAMMA * reduce(add, map(lambda x: x[0]*utilities[x[1].show()], states))
                
                if action_util > best_util:
                    best_action = act_index
                    best_util = action_util

            policies[state] = best_action

        show(index, utilities, policies)
        index +=1
        if delta <= DELTA:
            break
        

In [16]:
value_iteration()

iteration=0
(0, 0, 0):-1=[0.000]
(0, 0, 1):-1=[0.000]
(0, 1, 0):-1=[0.000]
(0, 1, 1):-1=[0.000]
(1, 0, 0):RECHARGE=[-2.500]
(1, 0, 1):RECHARGE=[-2.500]
(1, 1, 0):RECHARGE=[-2.500]
(1, 1, 1):RECHARGE=[2.500]
iteration=1
(0, 0, 0):-1=[0.000]
(0, 0, 1):-1=[0.000]
(0, 1, 0):-1=[0.000]
(0, 1, 1):-1=[0.000]
(1, 0, 0):RECHARGE=[-2.750]
(1, 0, 1):RECHARGE=[-2.750]
(1, 1, 0):RECHARGE=[-2.250]
(1, 1, 1):RECHARGE=[2.375]
iteration=2
(0, 0, 0):-1=[0.000]
(0, 0, 1):-1=[0.000]
(0, 1, 0):-1=[0.000]
(0, 1, 1):-1=[0.000]
(1, 0, 0):RECHARGE=[-2.775]
(1, 0, 1):RECHARGE=[-2.775]
(1, 1, 0):RECHARGE=[-2.263]
(1, 1, 1):RECHARGE=[2.362]
iteration=3
(0, 0, 0):-1=[0.000]
(0, 0, 1):-1=[0.000]
(0, 1, 0):-1=[0.000]
(0, 1, 1):-1=[0.000]
(1, 0, 0):RECHARGE=[-2.777]
(1, 0, 1):RECHARGE=[-2.777]
(1, 1, 0):RECHARGE=[-2.264]
(1, 1, 1):RECHARGE=[2.361]
iteration=4
(0, 0, 0):-1=[0.000]
(0, 0, 1):-1=[0.000]
(0, 1, 0):-1=[0.000]
(0, 1, 1):-1=[0.000]
(1, 0, 0):RECHARGE=[-2.778]
(1, 0, 1):RECHARGE=[-2.778]
(1, 1, 0):RECHARGE=[

In [17]:
state = (2,1,1)
for act_index in range(NUM_ACTIONS):
    cost, states = action(act_index, state)
    print(cost)
    for s in states:
        print(s[0],end='')
        print(s[1].show())


ValueError: 