<a href="https://colab.research.google.com/github/farhanhubble/discover-drl/blob/master/Rediscovering_RL_Notebook_0_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
agent = '⚽'
opponent = '👕'
goal = '🥅'

arena = [['⚽', ' ' , '👕', ' ' ],
         [' ' , ' ' , ' ' , '👕'],
         [' ' , '👕', ' ' , ' ' ],
         [' ' , ' ' , ' ' , '👕'],
         [' ' , '👕', ' ' , '🥅']]

In [12]:
import numpy as np

class Foolsball(object):

  def __to_state__(self,row,col):
    return row*self.n_cols + col

  def __to_indices__(self, state):
    row = state / self.n_rows
    col = state % self.n_rows
    return row,col

  def __deserialize__(self,map:list,agent:str,opponent:str, goal:str):
    self.n_rows = len(map)
    self.n_cols = len(map[0])
    self.n_states = self.n_rows * self.n_cols 
    self.map = np.asarray(map)

    self.init_state = None
    self.goal = None
    self.opponents = []

    for row in range(self.n_rows):
      for col in range(self.n_cols):
        if map[row][col] == agent:
          self.init_state = self.__to_state__(row,col)
        
        elif map[row][col] == agent:
          self.opponents.append(self.__to_state__(row,col))

        elif map[row][col] == goal:
          self.goal = self.__to_state__(row,col)

    assert self.init_state is not None, print(f"Map {map} does not specify an agent {agent} location")
    assert self.goal is not None,  print(f"Map {map} does not specify a goal {goal} location")
    assert not self.opponents,  print(f"Map {map} does not specify any opponents {opponent} location")

    return self.init_state


  def __get_next_state_on_action__(self,state,action):
    row, col = self.__to_indices__(state)
    action_to_index_delta = {'n':[-1,0], 'e':[0,+1], 'w':[0,-1], 's':[+1,0]}

    row_delta, col_delta = action_to_index_delta[action]
    new_row , new_col = row+row_delta, col+col_delta

    ## Return current state if next state is invalid
    if not(0<=new_row<self.n_rows) or not(0<=new_col<self.n_cols):
      return state  
    
    return self.__to_state__(new_row, new_col)

  
  def __init__(self,map,agent,opponent,goal):
    self.state = self.__deserialize__(map,agent,opponent,goal)
    self.done = False
    self.actions = ['n','e','w','s']
    self.transitions = self.__install_transition_table__()

  
  def __install_transition_table__(self):
    transitions = {}
    for s in range(self.n_states):
      for a in self.actions:
        transitions[s] = self.__get_next_state_on_action__(s,a)


  def reset(self):
    self.state = self.init_state
    self.done  = False
    return self.state

  
  def step(self,action):
    assert not self.done, \
    print(f'No actions supported in a terminal state {self.state}.'+
          ' Check the "done" flag before calling step()')
    next_state = __get_next_state_on_action__(self.state, action)

    ## Transition rejected due to illegal action (move)
    if next_state == self.state:
      reward = -1
      done = False
    
    ## Goal!
    elif next_state == __to_state__(self.goal):
      reward = +5
      done = self.done = True
    
    ## Ran into opponent. Heavy penalty.
    elif __to_indices__(next_state) in self.opponents:
      reward = -5
      done = self.done = True

    ## Made a safe and valid move. Penalize to take the shortest route.  
    else:
      reward = -1
      done = False

    return next_state, reward, done

In [13]:
foolsball = Foolsball(arena, agent, opponent, goal)