## EECS 738 - Machine learning 

### Reinforcement learning game - homework 4  

In [1]:
import numpy as np
import random

Building the environment

In [2]:
MONSTER = "m"
PLAYER = "p"
REWARD = "r"
EMPTY = "*"
TREE = "t"
END = "e"

grid = [
            [EMPTY, EMPTY, EMPTY, EMPTY, REWARD, MONSTER, MONSTER],
            [REWARD, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, PLAYER],
            [EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY, EMPTY, TREE, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY, REWARD, TREE, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY, EMPTY, TREE, EMPTY, EMPTY],
            [EMPTY, TREE, EMPTY, EMPTY, EMPTY, EMPTY, MONSTER],
            [EMPTY, TREE, EMPTY, EMPTY, EMPTY, EMPTY, EMPTY],
            [END, EMPTY, EMPTY, MONSTER, TREE, EMPTY, TREE]
        ]


In [3]:
for row in grid:
    print(' '.join(row))

* * * * r m m
r * * * * * *
* * * * * * p
* * * * * * *
* * * * t * *
* * * r t * *
* * * * t * *
* t * * * * m
* t * * * * *
e * * m t * t


Wrap our environment state in a class that holds the current grid and player position

In [4]:
class State:
    
    def __init__(self, grid, player_pos):
        self.grid = grid
        self.player_pos = player_pos
        
    def __eq__(self, other):
        return isinstance(other, State) and self.grid == other.grid and self.player_pos == other.player_pos
    
    def __hash__(self):
        return hash(str(self.grid) + str(self.player_pos))
    
    def __str__(self):
        return f"State(grid={self.grid}, player_pos={self.player_pos})"

Actions and init state

In [5]:
UP = 0
DOWN = 1
LEFT = 2
RIGHT = 3

ACTIONS = [UP, DOWN, LEFT, RIGHT]

start_state = State(grid=grid, player_pos=[0, 0])

Function that takes the current state with an action and returns new state, reward and whether or not the episode has completed


In [6]:
from copy import deepcopy

def act(state, action):
    
    def new_player_pos(state, action):
        p = deepcopy(state.player_pos)
        if action == UP:
            p[0] = max(0, p[0] - 1)
        elif action == DOWN:
            p[0] = min(len(state.grid) - 1, p[0] + 1)
        elif action == LEFT:
            p[1] = max(0, p[1] - 1)
        elif action == RIGHT:
            p[1] = min(len(state.grid[0]) - 1, p[1] + 1)
        else:
            raise ValueError(f"Unknown action {action}")
        return p
            
    p = new_player_pos(state, action)
    grid_item = state.grid[p[0]][p[1]]
    
    new_grid = deepcopy(state.grid)
    
    # Defining the rules and the point system:
    
    # move to a square with monster will end the game (failure)
    if grid_item == MONSTER:
        reward = -1000
        is_done = True
        new_grid[p[0]][p[1]] += PLAYER
        print("MONSTER - Game Over!")
    elif grid_item == REWARD:
        reward = 1000
        is_done = False
        old = state.player_pos
        new_grid[old[0]][old[1]] = EMPTY
        new_grid[p[0]][p[1]] = PLAYER
    elif grid_item == EMPTY:
        reward = -1
        is_done = False
        old = state.player_pos
        new_grid[old[0]][old[1]] = EMPTY
        new_grid[p[0]][p[1]] = PLAYER
    # move to the end square will end the game (success)
    elif grid_item == END:
        reward = 5000
        is_done = True
        new_grid[p[0]][p[1]] += PLAYER
        print("END - Success!")
    # player cannot move to square with a tree
    elif grid_item == TREE:
        reward = -20
        is_done = False
    elif grid_item == PLAYER:
        reward = -1
        is_done = False
    else:
        raise ValueError(f"Unknown grid item {grid_item}")
    
    return State(grid=new_grid, player_pos=p), reward, is_done

Learning 

In [7]:
import numpy as np
import random

random.seed(42) 

N_EPISODES = 1000

MAX_EPISODE_STEPS = 100

MIN_ALPHA = 0.02

alphas = np.linspace(1.0, MIN_ALPHA, N_EPISODES)
gamma = 1.0
eps = 0.2

q_table = dict()

A helper function q that gives the Q value for a state-action pair or for all actions, given a state

In [8]:
def q(state, action=None):
    
    if state not in q_table:
        q_table[state] = np.zeros(len(ACTIONS))
        
    if action is None:
        return q_table[state]
    
    return q_table[state][action]

Act with random action with some small probability or the best action seen so far (using our q_table)

In [9]:
def choose_action(state):
    if random.uniform(0, 1) < eps:
        return random.choice(ACTIONS) 
    else:
        return np.argmax(q(state))

Training agent using the Q-learning algorithm

In [12]:
for e in range(N_EPISODES):
    
    state = start_state
    total_reward = 0
    alpha = alphas[e]
    
    for _ in range(MAX_EPISODE_STEPS):
        action = choose_action(state)
        next_state, reward, done = act(state, action)
        total_reward += reward
        
        q(state)[action] = q(state, action) + \
                alpha * (reward + gamma *  np.max(q(next_state)) - q(state, action))
        state = next_state
        if done:
            break
    print(f"Episode {e + 1}: total reward: -> {total_reward} points")

END - Success!
Episode 1: total reward: -> 7985 points
END - Success!
Episode 2: total reward: -> 7954 points
END - Success!
Episode 3: total reward: -> 7981 points
END - Success!
Episode 4: total reward: -> 7981 points
END - Success!
Episode 5: total reward: -> 7976 points
MONSTER - Game Over!
Episode 6: total reward: -> 1933 points
MONSTER - Game Over!
Episode 7: total reward: -> 993 points
END - Success!
Episode 8: total reward: -> 7933 points
END - Success!
Episode 9: total reward: -> 7983 points
MONSTER - Game Over!
Episode 10: total reward: -> 993 points
END - Success!
Episode 11: total reward: -> 7983 points
END - Success!
Episode 12: total reward: -> 7950 points
END - Success!
Episode 13: total reward: -> 7981 points
END - Success!
Episode 14: total reward: -> 7979 points
END - Success!
Episode 15: total reward: -> 7979 points
END - Success!
Episode 16: total reward: -> 7981 points
END - Success!
Episode 17: total reward: -> 7959 points
END - Success!
Episode 18: total reward: 

END - Success!
Episode 150: total reward: -> 7956 points
MONSTER - Game Over!
Episode 151: total reward: -> 996 points
MONSTER - Game Over!
Episode 152: total reward: -> 1986 points
END - Success!
Episode 153: total reward: -> 7983 points
Episode 154: total reward: -> 2827 points
END - Success!
Episode 155: total reward: -> 7970 points
END - Success!
Episode 156: total reward: -> 7960 points
END - Success!
Episode 157: total reward: -> 7978 points
END - Success!
Episode 158: total reward: -> 7980 points
MONSTER - Game Over!
Episode 159: total reward: -> 996 points
END - Success!
Episode 160: total reward: -> 7956 points
END - Success!
Episode 161: total reward: -> 7971 points
END - Success!
Episode 162: total reward: -> 7975 points
END - Success!
Episode 163: total reward: -> 7979 points
END - Success!
Episode 164: total reward: -> 7973 points
END - Success!
Episode 165: total reward: -> 7981 points
END - Success!
Episode 166: total reward: -> 7981 points
END - Success!
Episode 167: to

END - Success!
Episode 326: total reward: -> 7980 points
Episode 327: total reward: -> 1883 points
END - Success!
Episode 328: total reward: -> 7959 points
END - Success!
Episode 329: total reward: -> 7975 points
END - Success!
Episode 330: total reward: -> 7935 points
END - Success!
Episode 331: total reward: -> 7985 points
END - Success!
Episode 332: total reward: -> 7981 points
MONSTER - Game Over!
Episode 333: total reward: -> 1980 points
END - Success!
Episode 334: total reward: -> 7963 points
END - Success!
Episode 335: total reward: -> 7985 points
END - Success!
Episode 336: total reward: -> 7960 points
END - Success!
Episode 337: total reward: -> 7950 points
END - Success!
Episode 338: total reward: -> 7962 points
END - Success!
Episode 339: total reward: -> 7978 points
END - Success!
Episode 340: total reward: -> 7973 points
END - Success!
Episode 341: total reward: -> 7978 points
MONSTER - Game Over!
Episode 342: total reward: -> 1940 points
END - Success!
Episode 343: total 

END - Success!
Episode 479: total reward: -> 7978 points
MONSTER - Game Over!
Episode 480: total reward: -> 1899 points
END - Success!
Episode 481: total reward: -> 7962 points
END - Success!
Episode 482: total reward: -> 7983 points
END - Success!
Episode 483: total reward: -> 7982 points
MONSTER - Game Over!
Episode 484: total reward: -> 987 points
END - Success!
Episode 485: total reward: -> 7958 points
END - Success!
Episode 486: total reward: -> 7983 points
END - Success!
Episode 487: total reward: -> 7981 points
MONSTER - Game Over!
Episode 488: total reward: -> 1980 points
END - Success!
Episode 489: total reward: -> 7983 points
END - Success!
Episode 490: total reward: -> 7983 points
END - Success!
Episode 491: total reward: -> 7981 points
END - Success!
Episode 492: total reward: -> 7984 points
END - Success!
Episode 493: total reward: -> 7962 points
END - Success!
Episode 494: total reward: -> 7981 points
END - Success!
Episode 495: total reward: -> 7962 points
END - Success!

END - Success!
Episode 621: total reward: -> 7977 points
END - Success!
Episode 622: total reward: -> 7981 points
END - Success!
Episode 623: total reward: -> 7954 points
END - Success!
Episode 624: total reward: -> 7981 points
END - Success!
Episode 625: total reward: -> 7981 points
END - Success!
Episode 626: total reward: -> 7918 points
END - Success!
Episode 627: total reward: -> 7979 points
END - Success!
Episode 628: total reward: -> 7982 points
END - Success!
Episode 629: total reward: -> 7980 points
END - Success!
Episode 630: total reward: -> 7979 points
END - Success!
Episode 631: total reward: -> 7962 points
END - Success!
Episode 632: total reward: -> 7958 points
END - Success!
Episode 633: total reward: -> 7985 points
MONSTER - Game Over!
Episode 634: total reward: -> 1884 points
END - Success!
Episode 635: total reward: -> 7983 points
END - Success!
Episode 636: total reward: -> 7956 points
END - Success!
Episode 637: total reward: -> 7962 points
END - Success!
Episode 63

END - Success!
Episode 797: total reward: -> 7954 points
END - Success!
Episode 798: total reward: -> 7981 points
END - Success!
Episode 799: total reward: -> 7981 points
END - Success!
Episode 800: total reward: -> 7958 points
END - Success!
Episode 801: total reward: -> 7979 points
END - Success!
Episode 802: total reward: -> 7979 points
END - Success!
Episode 803: total reward: -> 7985 points
END - Success!
Episode 804: total reward: -> 7978 points
END - Success!
Episode 805: total reward: -> 7981 points
END - Success!
Episode 806: total reward: -> 7937 points
MONSTER - Game Over!
Episode 807: total reward: -> 1984 points
END - Success!
Episode 808: total reward: -> 7916 points
MONSTER - Game Over!
Episode 809: total reward: -> 1956 points
END - Success!
Episode 810: total reward: -> 7980 points
END - Success!
Episode 811: total reward: -> 7978 points
END - Success!
Episode 812: total reward: -> 7984 points
END - Success!
Episode 813: total reward: -> 7952 points
END - Success!
Epis

END - Success!
Episode 965: total reward: -> 7983 points
MONSTER - Game Over!
Episode 966: total reward: -> 1937 points
END - Success!
Episode 967: total reward: -> 7979 points
END - Success!
Episode 968: total reward: -> 7974 points
END - Success!
Episode 969: total reward: -> 7973 points
END - Success!
Episode 970: total reward: -> 7960 points
END - Success!
Episode 971: total reward: -> 7957 points
Episode 972: total reward: -> 2884 points
END - Success!
Episode 973: total reward: -> 7975 points
END - Success!
Episode 974: total reward: -> 7973 points
MONSTER - Game Over!
Episode 975: total reward: -> 995 points
END - Success!
Episode 976: total reward: -> 7981 points
END - Success!
Episode 977: total reward: -> 7984 points
END - Success!
Episode 978: total reward: -> 7984 points
MONSTER - Game Over!
Episode 979: total reward: -> 1941 points
END - Success!
Episode 980: total reward: -> 7982 points
MONSTER - Game Over!
Episode 981: total reward: -> 1885 points
MONSTER - Game Over!
Ep

The more we train the model, the more points we are able to collect