# Setup

## Imports

In [15]:
from typing import \
    Callable, \
    List, \
    NewType, \
    Tuple


import numpy as np

### Temporary

In [33]:
from collections import Counter

## Types

In [12]:
Encounter = NewType('Encounter', int)
Position = NewType('Position', Tuple[int, int])
Reward = NewType('Reward', int)
Move = NewType('Move', Tuple[Position, Reward])
History = NewType('History', List[Move])

## Constants

In [16]:
INIT_POS = Position((0, 0))
NUM_MOVES = 10

# Encounter Types
EMPTY = 0
MONSTER = 1
TREASURE = 2

ENCOUNTER_TO_REWARD = {
    EMPTY: 0,
    MONSTER: -10,
    TREASURE: 10
}

# Classes

## Cell Class

In [66]:
class Cell:
    def __init__(self, p_empty: float, p_monster: float, p_treasure: float):
        """
        Init with probabilities set
        """
        self.p_empty = p_empty
        self.p_monster = p_monster
        self.p_treasure = p_treasure
    
    def __call__(self) -> Reward:
        """
        Returns reward
        """
        x = np.random.rand()
        
        if x < self.p_empty:
            return ENCOUNTER_TO_REWARD[EMPTY]
        if x < self.p_monster + self.p_empty:
            return ENCOUNTER_TO_REWARD[MONSTER]
        return ENCOUNTER_TO_REWARD[TREASURE]
    
    def __str__(self) -> str:
        
        p_empty = self.p_empty
        p_monster = self.p_monster
        p_treasure = self.p_treasure
        
        return f'Probability of Empty: {p_empty:.2f}\nProbability of Monster: {p_monster:.2f}\nProbability of Treasure: {p_treasure:.2f}'

### Testing

In [68]:
c = Cell(0.2, 0.7, 1)

Counter([c() for _ in range(100000)])

Counter({10: 10012, -10: 69970, 0: 20018})

## Grid Class

In [69]:
class Grid:
    def __init__(self, cells: List[List[Cell]]):
        """
        Init with cells
        """
        
        self.cells = cells
        self.num_rows = len(cells)
        self.num_cols = len(cells[0])
    
    def valid_next_positions(self, position: Position) -> List[Position]:
        """
        Returns list of valid next cells
        """
        
        i, j = position
        
        valid_is = [i]
        valid_js = [j]
        
        if i > 0:
            valid_is.append(i - 1)
        if i < self.num_rows - 1:
            valid_is.append(i + 1)
        if j > 0:
            valid_js.append(j - 1)
        if j < self.num_cols - 1:
            valid_js.append(j + 1)
            
        return [Position((i, j)) for i in valid_is for j in valid_js if Position((i, j)) != position]
        
    def __getitem__(self, position: Position):
        i, j = position
        return self.cells[i][j]

### Testing

In [78]:
I, J = 3, 3

cells = []
for _ in range(I):
    row = []
    for _ in range(J):
        x = np.random.rand()
        y = np.random.rand() * (1 - x)
        z = 1 - x - y
        
        probabilities = [x, y, z]
        np.random.shuffle(probabilities)
        
        row.append(Cell(*probabilities))
        
    cells.append(row)

grid = Grid(cells)

print(grid.valid_next_positions((0,0)))
print(grid[1, 1])
print(cells[1][1])

[(0, 1), (1, 0), (1, 1)]
Probability of Empty: 0.06
Probability of Monster: 0.14
Probability of Treasure: 0.80
Probability of Empty: 0.06
Probability of Monster: 0.14
Probability of Treasure: 0.80


## ModelCell and ModelGrid Classes

In [6]:
class ModelCell(Cell):
    def __init__(self):
        """
        Init, tracking with num encounters etc
        """
        pass
    
    @property
    def p_empty(self):
        pass

    @property
    def p_monster(self):
        pass
        
    @property
    def p_treasure(self):
        pass
        
class ModelGrid(Grid):
    def __init__(self, cells: List[List[ModelCell]]):
        pass
    
    def update(self, position: Position, encounter: Encounter):
        """
        Update cell in position with encounter e.g. monster
        """
        pass

## Game Class

In [7]:
class Game:
    def __init__(self, grid: Grid):
        """
        Init with grid
        """
        pass
    
    def print(self, pos: Position) -> str:
        """
        Pretty print board with player's position
        """
        pass

## Player Class

In [8]:
StrategyFunction = NewType('StrategyFunction', Callable[[Grid, Position], Cell])

class Player:
    def __init__(self, game: Game, strategy_fn: StrategyFunction, num_moves:int = NUM_MOVES):
        """
        Init with strategy function, grid model, history
        """
        pass
    
    def next_move(self):
        """
        Perform next move according to strategy function
        """
        pass
    
    def play(self) -> History:
        """
        Play until remaining moves expended, then return History
        """
        pass

# Strategy Functions

## $\varepsilon\text{-greedy}$

In [9]:
def epsilon_greedy_maker(epsilon: float) -> StrategyFunction:
    def epsilon_greedy(model: Grid, position: Position) -> Position:
        pass

## $\alpha\text{-index}$, Maximin, Maximax

In [11]:
def alpha_index_maker(alpha: float) -> StrategyFunction:
    def alpha_index(model: Grid, position: Position) -> Position:
        pass
    
maximin: StrategyFunction = alpha_index_maker(0)
maximax: StrategyFunction = alpha_index_maker(1)