In [134]:
import numpy as np
import pandas as pd # pandas is not required in codebase (only useful for development/visusalisation)

## Functions

In [121]:
# Note: in codebase, use get_world_dimension(world)
# Setup value map
def create_value_map(world_dim=(9,9), OUTER_MAP_VALUES=((-100, -100),(-100, -100))):
    pad_dim = (world_dim[0]-1, world_dim[1]-1)
    map_dim = np.add(world_dim,np.multiply(pad_dim,2))

    # Initialise value map
    world_map = np.zeros(world_dim)
    value_map = np.pad(world_map, (pad_dim,pad_dim), 'constant', constant_values=OUTER_MAP_VALUES)
    reward_value_map = value_map
    rval_offset = pad_dim[0]-1

    # Initialise base map for reward mask matrix creation
    base_map = np.zeros(map_dim)
    
    return reward_value_map, rval_offset
    
# Update value map based on reward entities input
def update_value_map(rval, value_map, rval_offset):
    reward = rval[2]
    reward_discount = reward/abs(reward)
    
    if reward > 0:
        # positive reward value
        for i, value in enumerate(range(0, reward, 1)):
            xstart= rval[1] + rval_offset - i
            xend = rval[1] + rval_offset + 1 + i
            ystart = rval[0] + rval_offset - i
            yend = rval[0] + rval_offset + 1 + i
            
            # Updates reward values in the map matrix.
            value_map[xstart:xend,ystart:yend] = value_map[xstart:xend,ystart:yend] + reward_discount
            
    elif reward < 0:
        # negative reward value
        for i, value in enumerate(range(0, reward, -1)):
            xstart= rval[1] + rval_offset - i
            xend = rval[1] + rval_offset + 1 + i
            ystart = rval[0] + rval_offset - i
            yend = rval[0] + rval_offset + 1 + i

            # Updates reward values in the map matrix.
            value_map[xstart:xend,ystart:yend] = value_map[xstart:xend,ystart:yend] + reward_discount
    else:
        # Reward assigned is 0.
        pass

## Inputs

In [168]:
# Reward entities inputs (x, y, reward value)
reward_entities = {
#     'bomb' : [5,6,-6],
    'ammo' : [1,1,10],
}

reward_entities


{'ammo': [1, 1, 9]}

## Updating the value map

In [169]:
# Initialise value map
reward_value_map, rval_offset = setup_value_map()

# Updates value map based on reward entities input
for rval in reward_entities.values():
    update_value_map(rval, reward_value_map, rval_offset)

## Visualising the value map

In [170]:
# Value map visualisation
df_map = pd.DataFrame(data=reward_value_map)
df_map_visual = df_map.iloc[7:18,7:18]
df_map_visual

Unnamed: 0,7,8,9,10,11,12,13,14,15,16,17
7,-92.0,-92.0,-92.0,-93.0,-94.0,-95.0,-96.0,-97.0,-98.0,-99.0,-100.0
8,-92.0,9.0,8.0,7.0,6.0,5.0,4.0,3.0,2.0,1.0,-100.0
9,-92.0,8.0,8.0,7.0,6.0,5.0,4.0,3.0,2.0,1.0,-100.0
10,-93.0,7.0,7.0,7.0,6.0,5.0,4.0,3.0,2.0,1.0,-100.0
11,-94.0,6.0,6.0,6.0,6.0,5.0,4.0,3.0,2.0,1.0,-100.0
12,-95.0,5.0,5.0,5.0,5.0,5.0,4.0,3.0,2.0,1.0,-100.0
13,-96.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,2.0,1.0,-100.0
14,-97.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,2.0,1.0,-100.0
15,-98.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,-100.0
16,-99.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-100.0


In [108]:
# reward = rval[2]
# rdiscount = reward/abs(reward)
# for i, value in enumerate(range(0, reward, -1)):
#     xstart= rval[1] + rval_offset - i
#     xend = rval[1] + rval_offset + 1 + i
#     ystart = rval[0] + rval_offset - i
#     yend = rval[0] + rval_offset + 1 + i
    
#     reward_value_map[xstart:xend,ystart:yend] = reward_value_map[xstart:xend,ystart:yend] + rdiscount

In [3]:
a = (2,2)

In [5]:
(a[0]-1,a[1]-1)

(1, 1)