# Import Libraries

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# R-Matrix

In [18]:
# initialize R matrix
R = np.empty((36,36))
R.fill(np.nan) # Fastest way to initilize R matrix
R

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [19]:
# define the reward for future use
r_time = -1
r_pond = -15
r_croissant = 200
r_cogs = 200
r_work = 15

In [27]:
# Create moves to adjusent cells
ones = []
for i in range(6):
    for j in range(6):
        cell = i*6 + j
        if j != 5:
            ones.append((cell, cell+1)) # move right unless agent is on right edge
        if cell - 6 >= 0:
            ones.append((cell, cell-6)) # move up if not in top row
        if cell + 6 < 36:
            ones.append((cell, cell+6)) # move down if cell not in bottom row
        if j != 0:
            ones.append((cell, cell-1)) # move left if not on left edge
        ones.append((cell, cell)) # staying still is possible, why not?



In [21]:
# add tube lines
ones.extend([(0,23), (23,0), (8,25), (25,8)])
ones = tuple(zip(*ones))
# assign value for each move
R[ones] = r_time
R

array([[-1., -1., nan, ..., nan, nan, nan],
       [-1., -1., -1., ..., nan, nan, nan],
       [nan, -1., -1., ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., -1., -1., nan],
       [nan, nan, nan, ..., -1., -1., -1.],
       [nan, nan, nan, ..., nan, -1., -1.]])

In [29]:
# now dissallow moves across walls. Just dissallow moves one way across a wall...
nans = [(2,3), (8,9), (14,15), (18, 24), (19,25), (32,33), (11,17)]
# ... and now dissallow moves the other way:
nans.extend([(t[1], t[0]) for t in nans])
R[tuple(zip(*nans))] = np.nan
R

array([[-1., -1., nan, ..., nan, nan, nan],
       [-1., -1., -1., ..., nan, nan, nan],
       [nan, -1., -1., ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., -1., -1., nan],
       [nan, nan, nan, ..., -1., -1., -1.],
       [nan, nan, nan, ..., nan, -1., -1.]])

In [30]:
def move_to(l, cell):
    for i in [-6, -1, 1, 6]:
        l.append((cell + i, cell))
    return l

# don't fall in the pond!
ponds = move_to([], 16)
print(ponds)
ponds = move_to(ponds,27)
print(ponds)
#ponds = move_to(move_to([], 16), 27)

ponds

[(10, 16),
 (15, 16),
 (17, 16),
 (22, 16),
 (21, 27),
 (26, 27),
 (28, 27),
 (33, 27)]

In [31]:
# don't fall in the pond!
ponds = move_to(move_to([], 16), 27)
# staying in the pond is also pretty unpleasant. Brrrr!
ponds.extend([(16,16), (27,27)])
ponds = tuple(zip(*ponds))
R[ponds] = r_pond

In [33]:
ponds

((10, 15, 17, 22, 21, 26, 28, 33, 16, 27),
 (16, 16, 16, 16, 27, 27, 27, 27, 16, 27))

In [41]:
# nice to eat a croissant before work
crois = move_to([], 10)
crois = tuple(zip(*crois))
R[crois] = r_croissant

# finally we write something by hand!
R[(26, 31), (32, 32)] = r_cogs

R[(29, 34, 35), (35, 35, 35)] = r_work

In [43]:
def display_R(matrix, start=None, end=None):
    pd.set_option("display.max_columns", None)
    display(pd.DataFrame(matrix).loc[start:end, start:end])
    
display_R(R)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35
0,-1.0,-1.0,,,,,-1.0,,,,,,,,,,,,,,,,,-1.0,,,,,,,,,,,,
1,-1.0,-1.0,-1.0,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,,-1.0,-1.0,,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,-1.0,-1.0,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,,-1.0,-1.0,-1.0,,,,,200.0,,,,,,,,,,,,,,,,,,,,,,,,,
5,,,,,-1.0,-1.0,,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,,
6,-1.0,,,,,,-1.0,-1.0,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,,
7,,-1.0,,,,,-1.0,-1.0,-1.0,,,,,-1.0,,,,,,,,,,,,,,,,,,,,,,
8,,,-1.0,,,,,-1.0,-1.0,,,,,,-1.0,,,,,,,,,,,-1.0,,,,,,,,,,
9,,,,-1.0,,,,,,-1.0,200.0,,,,,-1.0,,,,,,,,,,,,,,,,,,,,
