# Setup

## Imports

In [37]:
from typing import \
    Callable, \
    List, \
    NewType, \
    Tuple

import numpy as np

### Temporary

In [2]:
from collections import Counter

## Types

In [38]:
Encounter = NewType('Encounter', int)
Position = NewType('Position', Tuple[int, int])
Reward = NewType('Reward', int)
Move = NewType('Move', Tuple[Position, Reward])
History = NewType('History', List[Move])

## Constants

In [54]:
INIT_POS = Position((0, 0))
NUM_MOVES = 10

# Encounter Types
EMPTY = Encounter(0)
MONSTER = Encounter(1)
TREASURE = Encounter(2)

ENCOUNTER_TO_REWARD = {
    EMPTY: 0,
    MONSTER: -10,
    TREASURE: 10
}

ENCOUNTER_TO_REWARD_ORDERING = [EMPTY, MONSTER, TREASURE]

# Classes

## Cell Class

In [40]:
class Cell:
    def __init__(self, p_empty: float, p_monster: float, p_treasure: float):
        """
        Init with probabilities set
        """
        self._p_empty = p_empty
        self._p_monster = p_monster
        self._p_treasure = p_treasure
        
    @property
    def p_empty(self):
        return self._p_empty
        
    @property
    def p_monster(self):
        return self._p_monster
        
    @property
    def p_treasure(self):
        return self._p_treasure
    
    def __call__(self) -> Reward:
        """
        Returns reward
        """
        x = np.random.rand()
        
        if x < self.p_empty:
            return ENCOUNTER_TO_REWARD[EMPTY]
        if x < self.p_monster + self.p_empty:
            return ENCOUNTER_TO_REWARD[MONSTER]
        return ENCOUNTER_TO_REWARD[TREASURE]
    
    def __str__(self) -> str:
        p_empty = self.p_empty
        p_monster = self.p_monster
        p_treasure = self.p_treasure
        
        return f'Probability of Empty: {p_empty:.2f}\nProbability of Monster: {p_monster:.2f}\nProbability of Treasure: {p_treasure:.2f}'

### Testing

In [41]:
cell = Cell(0.2, 0.7, 1)

Counter([cell() for _ in range(100000)])

Counter({0: 20014, -10: 69967, 10: 10019})

## Grid Class

In [42]:
class Grid:
    def __init__(self, cells: List[List[Cell]]):
        """
        Init with cells
        """
        
        self.cells = cells
        self.num_rows = len(cells)
        self.num_cols = len(cells[0])
    
    def valid_next_positions(self, position: Position) -> List[Position]:
        """
        Returns list of valid next cells
        """
        
        i, j = position
        
        valid_positions = []
        def add_position(i, j): 
            valid_positions.append(Position((i, j)))
        
        if i > 0:
            add_position(i - 1, j)
        if i < self.num_rows - 1:
            add_position(i + 1, j)
        if j > 0:
            add_position(i, j - 1)
        if j < self.num_cols - 1:
            add_position(i, j + 1)

        return valid_positions
        
    def __getitem__(self, position: Position):
        i, j = position
        return self.cells[i][j]

### Testing

In [43]:
I, J = 3, 3

cells = []
for _ in range(I):
    row = []
    for _ in range(J):
        x = np.random.rand()
        y = np.random.rand() * (1 - x)
        z = 1 - x - y
        
        probabilities = [x, y, z]
        np.random.shuffle(probabilities)
        
        row.append(Cell(*probabilities))
        
    cells.append(row)

grid = Grid(cells)

print(grid.valid_next_positions((1,1)))
print(grid[1, 1])
print(grid[1, 2] == cells[1][2])

[(0, 1), (2, 1), (1, 0), (1, 2)]
Probability of Empty: 0.25
Probability of Monster: 0.07
Probability of Treasure: 0.68
True


## ModelCell and ModelGrid Classes

In [69]:
class ModelCell(Cell):
    def __init__(self):
        """
        Init, tracking with num encounters etc
        """
        self.num_encounters = 0
        self.encounters = {
            EMPTY: 0,
            MONSTER: 0,
            TREASURE: 0
        }
        
        super().__init__(0, 0, 0)
    
    @Cell.p_empty.getter
    def p_empty(self):
        if self.num_encounters == 0:
            return 1/3
        
        return self.encounters[EMPTY] / self.num_encounters

    @Cell.p_monster.getter
    def p_monster(self):
        if self.num_encounters == 0:
            return 1/3
        
        return self.encounters[MONSTER] / self.num_encounters
        
    @Cell.p_treasure.getter
    def p_treasure(self):
        if self.num_encounters == 0:
            return 1/3
        
        return self.encounters[TREASURE] / self.num_encounters
    
    @property
    def expectation(self):
        ps = self.p_empty, self.p_monster, self.p_treasure
        values = [ENCOUNTER_TO_REWARD[key] for key in ENCOUNTER_TO_REWARD_ORDERING]
        
        return np.sum([p * val for p, val in zip(ps, values)])
    
    def update(self, encounter: Encounter):
        """
        Update encounters tracking with encounter
        """
        
        self.num_encounters += 1
        self.encounters[encounter] += 1
        
class ModelGrid(Grid):
    def __init__(self, cells: List[List[ModelCell]]):
        super().__init__(cells)
    
    def update(self, position: Position, encounter: Encounter):
        """
        Update cell in position with encounter e.g. monster
        """
        
        print(position)
        self[position].update(encounter)

### Testing

In [89]:
cell = ModelCell()

print(Counter([cell() for _ in range(10000)]))

cell.update(EMPTY)
print(Counter([cell() for _ in range(10000)]))
cell.update(np.random.choice([MONSTER, TREASURE]))
print(Counter([cell() for _ in range(10000)]))

cell.expectation

I, J = 3, 3

model_cells = []
for _ in range(I):
    row = [ModelCell() for _ in range(J)]
    model_cells.append(row)


model_grid = ModelGrid(model_cells)
position = Position((1,1))
model_grid[position] == model_cells[1][1]

print(model_grid[position].expectation)
model_grid.update(position, TREASURE)
print(model_grid[position].expectation)

Counter({0: 3424, 10: 3319, -10: 3257})
Counter({0: 10000})
Counter({0: 5087, -10: 4913})
0.0
(1, 1)
10.0


## Game Class

In [59]:
class Game:
    def __init__(self, grid: Grid):
        """
        Init with grid
        """
        self.grid = grid
    
    def print(self, player_pos: Position) -> str:
        """
        Pretty print board with player's position
        """
        strings = []
        grid, cells = self.grid, self.grid.cells
        
        for i in range(grid.num_rows):
            for j in range(grid.num_cols):
                strings.append('|')
                cell_pos = Position((i, j))
                if cell_pos == player_pos:
                    strings.append('P| ')
                elif cell_pos in grid.valid_next_positions(player_pos):
                    strings.append(' | ')
                else:
                    strings.append('x| ')
            strings.append('\n')
                
        print(''.join(strings))

### Testing

In [60]:
Game(grid).print(Position((1,2)))

|x| |x| | | 
|x| | | |P| 
|x| |x| | | 



## Player Class

In [61]:
StrategyFunction = NewType('StrategyFunction', Callable[[Grid, Position], Cell])

class Player:
    def __init__(self, game: Game, strategy_fn: StrategyFunction, num_moves:int = NUM_MOVES):
        """
        Init with strategy function, grid model, history
        """
        pass
    
    def next_move(self):
        """
        Perform next move according to strategy function
        """
        pass
    
    def play(self) -> History:
        """
        Play until remaining moves expended, then return History
        """
        pass

# Strategy Functions

## $\varepsilon\text{-greedy}$

In [70]:
def epsilon_greedy_maker(epsilon: float) -> StrategyFunction:
    def epsilon_greedy(model_grid: ModelGrid, position: Position) -> Position:
        is_greedy = np.random.rand() <= epsilon
        
        valid_positions = model.valid_next_positions(position)
        greedy_position = max(valid_positions, key=lambda p: model_grid[p].expectation)
        
        if is_greedy:
            return greedy_position
        
        position = greedy_position
        while position == greedy_position:
            position = np.random.choice(valid_positions)
            
        return position
    
    return epsilon_greedy

### Testing

In [90]:
pos = Position((1,1))
model_grid.update(
    pos,
    TREASURE
)

(1, 1)


## $\alpha\text{-index}$, Maximin, Maximax

In [15]:
def alpha_index_maker(alpha: float) -> StrategyFunction:
    def alpha_index(model: Grid, position: Position) -> Position:
        pass
    
maximin: StrategyFunction = alpha_index_maker(0)
maximax: StrategyFunction = alpha_index_maker(1)