In [1]:
import cvxpy as cp
import numpy as np
import pandas as pd

In [2]:
STEP_COST = -10
POSITIONS = 5
MAX_MATERIALS = 3         # num_material = 0..2
MAX_ARROWS = 4            # num_arrow = 0..3
MONSTER_STATES = 2
MAX_HEALTH_STATES = 5     # health = (0..4)*25

In [3]:
state_to_idx = np.zeros((POSITIONS, MAX_MATERIALS+1, MAX_ARROWS+1, MONSTER_STATES, MAX_HEALTH_STATES+1))
states = []
state_action_pairs = []

In [4]:
# directional CONSTANTS and UTILITIES

positions = {
    (0,0): "CENTER",
    (-1,0): "WEST",
    (1,0): "EAST",
    (0,1): "NORTH",
    (0,-1): "SOUTH"
}

coordinates = {
    "CENTER": (0,0),
    "WEST": (-1,0),
    "EAST": (1,0),
    "NORTH": (0,1),
    "SOUTH": (0,-1)
}

# actions possible at each location
possible_actions = {
    "CENTER": ["UP", "DOWN", "LEFT", "RIGHT", "STAY", "SHOOT", "HIT"],
    "WEST": ["RIGHT", "STAY", "SHOOT"],
    "EAST": ["LEFT", "STAY", "SHOOT", "HIT"],
    "NORTH": ["DOWN", "STAY", "CRAFT"],
    "SOUTH": ["UP", "STAY", "GATHER"]
}

# eg. the first element of a state tuple is 1 -> state corresponds to direction_tuple[1] = "WEST" location
direction_tuple = ["CENTER", "WEST", "EAST", "NORTH", "SOUTH"]
codes_loc = ["C", "W", "E", "N", "S"]
codes_state = ["D", "R"]

def calc_prob(state, action):
    location = direction_tuple[state[0]]
    
    ret = []
    
    if location == "NORTH":
        if action in ["DOWN", "STAY"]:
            new_state = move(action, state.copy())
            fail_state = state.copy()
            fail_state[0] = 2
            ret = [[0.85, 0.15], [new_state, fail_state]]
        else:
            arrow_probs = [0.5, 0.35, 0.15]
            arrow1 = state.copy()
            arrow2 = state.copy()
            arrow3 = state.copy()
            arrow1[1] -= 1
            arrow2[1] -= 1
            arrow3[1] -= 1
            arrow1[2] = min(3, arrow1[2] + 1)
            arrow2[2] = min(3, arrow2[2] + 2)
            arrow3[2] = min(3, arrow3[2] + 3)
            
            ret = [arrow_probs, [arrow1, arrow2, arrow3]]
            
  
    elif location == "SOUTH":
        if action in ["UP", "STAY"]:
            new_state = move(action, state.copy())
            fail_state = state.copy()
            fail_state[0] = 2
            ret = [[0.85, 0.15], [new_state, fail_state]]
        else:
            gather_probs = [0.75, 0.25]
            new_state = state.copy()
            fail_state = new_state.copy()
            new_state[1] = min(2, new_state[1] + 1)
            ret = [gather_probs, [new_state, fail_state]]
                
                
    elif location == "WEST":
        if action in ["RIGHT", "STAY"]:
            new_state = move(action, state.copy())
            fail_state = state.copy()
            fail_state[0] = 2
            ret = [[1, 0], [new_state, fail_state]]
        else:
            shoot_probs = [0.25, 0.75]
            new_state = state.copy()
            new_state[2] -= 1
            fail_state = new_state.copy()
            new_state[4] -= 1
            ret = [shoot_probs, [new_state, fail_state]]
            
            
    elif location == "EAST":
        if action in ["LEFT", "STAY"]:
            new_state = move(action, state.copy())
            fail_state = state.copy()
            fail_state[0] = 2
            ret = [[1, 0], [new_state, fail_state]]
        elif action == "SHOOT":
            shoot_probs = [0.9, 0.1]
            new_state = state.copy()
            new_state[2] -= 1
            fail_state = new_state.copy()
            new_state[4] -= 1
            ret = [shoot_probs, [new_state, fail_state]]
        else:
            hit_probs = [0.2, 0.8]
            new_state = state.copy()
            fail_state = new_state.copy()
            new_state[4] = max(0, new_state[4] - 2)
            ret = [hit_probs, [new_state, fail_state]]
            
            
    elif location == "CENTER":
        if action in ["UP", "DOWN", "RIGHT", "LEFT", "STAY"]:
            new_state = move(action, state.copy())
            fail_state = state.copy()
            fail_state[0] = 2
            ret = [[0.85, 0.15], [new_state, fail_state]]
        elif action == "SHOOT":
            shoot_probs = [0.5, 0.5]
            new_state = state.copy()
            new_state[2] -= 1
            fail_state = new_state.copy()
            new_state[4] -= 1
            ret = [shoot_probs, [new_state, fail_state]]
        else:
            hit_probs = [0.1, 0.9]
            new_state = state.copy()
            fail_state = new_state.copy()
            new_state[4] = max(0, new_state[4] - 2)
            ret = [hit_probs, [new_state, fail_state]]
            
    # monster slep slep 
    if not state[3]:
        awake_prob = [0.2, 0.8]
        probabilities = ret[0]
        states = ret[1]
        
        new_probs = []
        new_states = []
        
        for i, prob in enumerate(probabilities):
            new_probs.append(prob*awake_prob[0])
            new_probs.append(prob*awake_prob[1])
            awake_state = states[i].copy()
            awake_state[3] = 1
            new_states.append(awake_state)
            new_states.append(states[i].copy())
        
        ret = [new_probs, new_states]

    # monster wakey wakey   
    else:
        probabilities = ret[0]
        states = ret[1]
        
        new_probs = []
        new_states = []
        
        if location == "EAST" or "CENTER":
            attacc_prob = [0.5, 0.5]
            
            for i, prob in enumerate(probabilities):
                new_probs.append(prob*attacc_prob[0])
                new_states.append(states[i].copy())
            new_probs.append(attacc_prob[1])
            
            og_state = state.copy()
            og_state[2] = 0 # arrows
            og_state[4] = min(4, og_state[4] + 1) # helth
            og_state[3] = 0 # dormant state
            new_states.append(og_state)
            
        else:
            awake_prob = [0.5, 0.5]
            probabilities = ret[0]
            states = ret[1]

            for i, prob in enumerate(probabilities):
                new_probs.append(prob*awake_prob[0])
                new_probs.append(prob*awake_prob[1])
                awake_state = states[i].copy()
                awake_state[3] = 0
                new_states.append(awake_state)
                new_states.append(states[i].copy())
        ret = [new_probs, new_states]
            
    return ret

In [5]:
def Reward(curr, prev):
    if ((prev[3] == 1) and(curr[3]==0)):
        if ((curr[0] in [0,3]) and (curr[4]>0)):
            return -40
    return 0

In [14]:
def initialize():
    count = 0
    for state, _ in np.ndenumerate(state_to_idx):
        state_to_idx[state] = count
        states.append(state)

        if state[0] == 0:
            actions = ["UP", "LEFT", "DOWN", "RIGHT", 'STAY', "SHOOT", "HIT", "NONE"]
        elif state[0] == 1:
            actions = ["DOWN", 'STAY', "CRAFT","NONE"]
        elif state[0] == 2:
            actions = ["UP", "GATHER" , 'STAY', "NONE"]
        elif state[0] == 3:
            actions = ["LEFT", "SHOOT", "HIT", "NONE"]
        elif state[0] == 4:
            actions = ["RIGHT", 'STAY', "SHOOT", "NONE"]
        
        if "SHOOT" in actions:
            if state[2] == 0:
                actions.remove("SHOOT")
        if "CRAFT" in actions:
            if state[1] == 0:
                actions.remove("CRAFT")
        
        
        if state[4] == 0:
            actions = ["NONE"]        
        if state[4] > 0 and ("NONE" in actions):
            actions.remove("NONE")
        
        for flem in actions:
            state_action_pairs.append((state, flem))
        count += 1

In [18]:
def calculate_A():
    final = np.zeros((len(states),len(state_action_pairs)))
    for i, state_action in enumerate(state_action_pairs):
        if state_action[1] == "NONE":
            final[int(state_to_idx[state_action[0]])][i] = 1
        else:
            prob, state = calc_prob(list(state_action[0]), state_action)
            for j, p in enumerate(prob):
                final[int(state_to_idx[state_action[0]])][i] += p
                final[int(state_to_idx[tuple(state[j])])][i] -= p
    return final

def calculate_alpha(general, start):
    if general:
        return np.full((len(states),1/600))
    else:
        final = np.zeros((len(states), 1))
        final[state_to_idx[start]][0] = 1
        return final

def calculate_R():
    final = np.zeros(len(state_action_pairs))
    for i , state_action in enumerate(state_action_pairs):
        if state_action[1] != "NONE":
            probablity, state = calc_prob(list(state_action[1]), state_action[1])
            for j, prob in enumerate(probablity):
                final[i] += prob*( Reward(state_action[0], state[j]) + STEPCOST )

In [8]:
def LinProg():
    x = cp.Variable(shape=(len(idx_to_state_action), 1), name="x")
    constraints = [cp.matmul(A, x) == alpha, x >= 0]
    objective = cp.Maximize(cp.matmul(r,x))
    problem = cp.Problem(objective, constraints)
    
    return problem.solve()

In [19]:
start_state = (0,0,0,0,100)
general_case = True
initialize()
print(len(state_action_pairs))
A = calculate_A()
R = calculate_R()
alpha = calculate_alpha(general_case, start_state)
LinProg(A, alpha, R)

11491


TypeError: list indices must be integers or slices, not str