In [1]:
import numpy as np

class Grid:
    def __init__(self):
        self.grid = np.array([
            [' ', '*', ' ', ' ', ' '],
            [' ', '*', ' ', ' ', ' '],
            [' ', ' ', ' ', '*', 'O'],
            [' ', '*', 'O', ' ', ' '],
            [' ', ' ', ' ', ' ', '<3'],
            ])
        
    def get(self, *key):
        y, x, = key
        return self.grid[y, x]
        
    def copy(self):
        return np.copy(self.grid)
    

class Game:
    def __init__(self, prnt_rwd=True, fName=None):
        self.prnt_rwd = prnt_rwd
        self.state = (0, 0)
        self.reward_per_time = -1
        self.reward_on_hole = -5
        self.reward_on_love = 5
        self.cum_rew = 25
        self.grid = Grid()
        self.game_over = False
        
    def __repr__(self):
        return "In this game we have to go to "\
    "the heart symbol to find the purpose "\
    "of our life and while going there "\
    "we need to avoid 'O' as they are rabbit holes"
        
    def is_game_finished(self):
        return self.grid.get(*self.state) in ['O', '<3']

    def get_cum_reward(self):
        return self.cum_rew

    def _helper(self):
        if self.grid.get(*self.state) == 'O':
            return '0'
        if self.grid.get(*self.state) == '<3':
            return '<@3'
        if self.grid.get(*self.state) == ' ':
            return '@'

    def render(self):
        prt = self.grid.copy()
        prt[self.state[0], self.state[1]] = self._helper() 
        print(prt)

    def act(self, action):
        if self.game_over:
            print ('The Final Cumulated Reward: ', self.cum_rew)
            print ('Game Over!!')
            return None, None

        cr_reward = self.get_reward()
        movement = self.get_direction(action)
        if not self._is_valid_move(movement):
            print('Not a valid Move')
            next_state = self.state
        else:
            next_state = self.move(movement)
            self.game_over = self.is_game_finished()
        self.cum_rew += cr_reward
        if self.prnt_rwd:
            print ('Current Reward: ', cr_reward)
            print ('Current State: ', next_state)
        return [tuple(next_state), self.grid.grid], cr_reward

    def move(self, movement, statechange=True):
        next_state = [0,0]
        next_state[0] = self.state[0] + movement[0]
        next_state[1] = self.state[1] + movement[1]
        if statechange:
            self.state = next_state
        return next_state

    def get_reward(self):
        if self.grid.get(*self.state) == '<3': # heart
            return self.reward_on_love
        elif self.grid.get(*self.state) == 'O': #hole
            return self.reward_on_hole
        elif self.grid.get(*self.state) == ' ': #empty
            return self.reward_per_time

    def _is_valid_move(self, movement):
        y_hyp = self.state[0] + movement[0] 
        x_hyp = self.state[1] + movement[1]
        if x_hyp < 0 or y_hyp < 0:
            return False

        try:
            if self.grid.get(y_hyp, x_hyp) == '*': # a wall
                return False
            else: # all cool
                return True
        except IndexError: # don't go out of matrix
            return False

    def get_direction(self, action):
        if action == 'north':
            return (-1, 0)
        elif action == 'south':
            return (1, 0)
        elif action == 'west':
            return (0, -1)
        else: # action == 'east'
            return (0, 1)


In [2]:
gg = Game()
gg

In this game we have to go to the heart symbol to find the purpose of our life and while going there we need to avoid 'O' as they are rabbit holes

In [3]:
gg.render()

[['@' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


In [4]:
next_state, cr_reward = gg.act('north')
gg.render()
gg.get_cum_reward()

Not a valid Move
Current Reward:  -1
Current State:  (0, 0)
[['@' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


24

In [5]:
next_state, cr_reward = gg.act('south')
gg.render()

Current Reward:  -1
Current State:  [1, 0]
[[' ' '*' ' ' ' ' ' ']
 ['@' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


In [6]:
next_state, cr_reward = gg.act('west')
gg.render()
gg.get_cum_reward()

Not a valid Move
Current Reward:  -1
Current State:  [1, 0]
[[' ' '*' ' ' ' ' ' ']
 ['@' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


22

In [7]:
next_state, cr_reward = gg.act('south')
gg.render()
gg.get_cum_reward()

Current Reward:  -1
Current State:  [2, 0]
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 ['@' ' ' ' ' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


21

In [8]:
next_state, cr_reward = gg.act('east')
gg.render()
gg.get_cum_reward()

Current Reward:  -1
Current State:  [2, 1]
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' '@' ' ' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


20

In [9]:
next_state, cr_reward = gg.act('north')
gg.render()
gg.get_cum_reward()

Not a valid Move
Current Reward:  -1
Current State:  [2, 1]
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' '@' ' ' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


19

In [10]:
next_state, cr_reward = gg.act('east')
gg.render()
gg.get_cum_reward()

Current Reward:  -1
Current State:  [2, 2]
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' ' ' '@' '*' 'O']
 [' ' '*' 'O' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


18

In [11]:
next_state, cr_reward = gg.act('south')
gg.render()
gg.get_cum_reward()

Current Reward:  -1
Current State:  [3, 2]
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' '0' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


17

In [12]:
next_state, cr_reward = gg.act('east')
gg.render()
gg.get_cum_reward()

The Final Cumulated Reward:  17
Game Over!!
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' '0' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


17

In [13]:
next_state, cr_reward = gg.act('east')
gg.render()
gg.get_cum_reward()

The Final Cumulated Reward:  17
Game Over!!
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' '0' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


17

In [14]:
next_state, cr_reward = gg.act('south')
gg.render()
gg.get_cum_reward()

The Final Cumulated Reward:  17
Game Over!!
[[' ' '*' ' ' ' ' ' ']
 [' ' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' 'O']
 [' ' '*' '0' ' ' ' ']
 [' ' ' ' ' ' ' ' '<3']]


17

In [15]:
gg.get_cum_reward()

17