In [None]:
%matplotlib widget
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, '..')
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import itertools
from matplotlib import cm
from lmdps import lmdps
import math
from scipy.special import kl_div
import torch
from sklearn.preprocessing import MinMaxScaler
from scipy import sparse
import time

#  Abstract Rooms

<img src="pictures/room.png" alt="" width="250" height="250">

Plot the value functions for TOP

In [2]:
'''Z = T[:-4].reshape(5, 5)
ss = 'TOP Reward'
fig = lmdps.plot_Z_function(ss, N_DIM, Z, np.log(Z))
fig.show()'''

"Z = T[:-4].reshape(5, 5)\nss = 'TOP Reward'\nfig = lmdps.plot_Z_function(ss, N_DIM, Z, np.log(Z))\nfig.show()"

# Room with goal

In [7]:
N_DIM = 5
terminal={(-1, 2): (0, 2), (2, -1): (2, 0), (2, 5): (2, 4), (5, 2): (4, 2)}
P_goal, newmapping = lmdps.create_room_hierarchical(N_DIM, terminal, goals=[8])

In [8]:
# T, L , R, B -> 50, 51, 52, 53
terminal_GOAL = {'GOAL': 8, 'GOAL_TOP': 25, 'GOAL_LEFT': 26, 'GOAL_RIGHT':27, 'GOAL_BOTTOM':28}

P_goal[terminal_GOAL['GOAL'], :] = 0.
P_goal[terminal_GOAL['GOAL'], terminal_GOAL['GOAL']] = 1.

goals = {}

for s in terminal_GOAL:
    q = np.full((P_goal.shape[0], 1), -1)
    q[terminal_GOAL[s], :] = 0
    goals[s] = lmdps.solve_lmdp(P_goal, q)
    
GOAL = goals['GOAL']
GOAL_T = goals['GOAL_TOP']
GOAL_L = goals['GOAL_LEFT']
GOAL_R = goals['GOAL_RIGHT']
GOAL_B = goals['GOAL_BOTTOM']

scl = MinMaxScaler()

scl.fit(GOAL)
GOAL = scl.transform(GOAL)

scl.fit(GOAL_T)
GOAL_T = scl.transform(GOAL_T)

scl.fit(GOAL_L)
GOAL_L = scl.transform(GOAL_L)

scl.fit(GOAL_R)
GOAL_R = scl.transform(GOAL_R)

scl.fit(GOAL_B)
GOAL_B = scl.transform(GOAL_B)

In [9]:
GOAL_ = GOAL[:-4].reshape(5, 5)
ss = 'ROOM with goal @ (1,4)'
fig = lmdps.plot_Z_function(ss, N_DIM, GOAL_, np.log(GOAL_))
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# High-Level MDP

<img src="pictures/setting.png" alt="" width="250" height="250">

For each equivalence class we have computed $Z_1^*(s),...,Z_k^*(s)$ are known for each state $s$. When computing the values at the top level, the exponenetiated reward in the terminal states $t_1,\dots, t_k$ is given by the current _Z-values_ in those states, i.e. $Z(t_1), \dots, Z(t_k)$. Hence the top-level Bellman equation for state s is given by

\begin{equation}
Z(s) = Z(t_1) Z_1^*(s) + \dots + Z(t_k) Z_k^*(s)
\end{equation}

We can write this in matrix form as $Z = QZ$, where each entry of $Q$ equals $Q_{ij} = 0$ if $s_j$ is not a terminal state in the LMDP for $s_i$, and $Q_{ij} = Z^*_{l}(s_i)$ if $s_j$ is the $l$-th terminal state in the LMDP for $s_i$.

Therefore, in $Q$ we have 

In [10]:
'''#N_terminal = 19
#Q = np.full((N_terminal, N_terminal), 0)
room_1 = (B + R).copy()
room_2 = (L + R + B).copy()
room_3 = (GOAL + GOAL_L + GOAL_B).copy()
room_4 = (T + R + B).copy()
room_5 = (T + L + R + B).copy()
room_6 = (T + L + B).copy()
room_7 = (T + R).copy()
room_8 = (T + L + R).copy()
room_9 = (T + L).copy()'''

'#N_terminal = 19\n#Q = np.full((N_terminal, N_terminal), 0)\nroom_1 = (B + R).copy()\nroom_2 = (L + R + B).copy()\nroom_3 = (GOAL + GOAL_L + GOAL_B).copy()\nroom_4 = (T + R + B).copy()\nroom_5 = (T + L + R + B).copy()\nroom_6 = (T + L + B).copy()\nroom_7 = (T + R).copy()\nroom_8 = (T + L + R).copy()\nroom_9 = (T + L).copy()'

###### start_H = time.time()

for _ in range(10000):
    room_1 = room_2[10] * R + room_4[2] * B
    room_2 = L * room_1[14] + R * room_3[10] + B * room_5[2]
    room_3 = room_2[14] * GOAL_L + room_6[2] * GOAL_B + room_3[8] * GOAL
    room_4 = room_1[22] * T + room_5[10] * R + room_7[2] * B
    room_5 = room_2[22] * T + room_6[10] * R + room_4[14] * L + room_8[2] * B
    room_6 = room_3[22] * T + room_5[14] * L + room_9[2] * B
    room_7 = room_4[22] * T + room_8[10] * R
    room_8 = room_5[22] * T + room_9[10] * R + room_7[14] * L
    room_9 = room_6[22] * T + room_8[14] * L

end_H = time.time()
print('Time hierarchical', end_H-start_H)

In [566]:
Z_OPT = np.zeros((15, 15))
Z_OPT[:5, :5] = np.log(room_1[:-4]).reshape(5, 5)
Z_OPT[:5, 5:10] = np.log(room_2[:-4]).reshape(5, 5)
Z_OPT[:5, 10:] = np.log(room_3[:-4]).reshape(5, 5)

Z_OPT[5:10, :5] = np.log(room_4[:-4]).reshape(5, 5)
Z_OPT[5:10, 5:10] = np.log(room_5[:-4]).reshape(5, 5)
Z_OPT[5:10, 10:] = np.log(room_6[:-4]).reshape(5, 5)

Z_OPT[10:, :5] = np.log(room_7[:-4]).reshape(5, 5)
Z_OPT[10:, 5:10] = np.log(room_8[:-4]).reshape(5, 5)
Z_OPT[10:, 10:] = np.log(room_9[:-4]).reshape(5, 5)

NameError: name 'room_1' is not defined

In [567]:
'''figure = plt.figure(figsize=(9, 9))
axes = figure.add_subplot(111)
axes.set_title('Composed hierarchical MDP')
caxes = axes.matshow(Z_OPT, interpolation ='nearest') 

for (i, j), data in np.ndenumerate(Z_OPT):
  
    axes.text(j, i, '{:0.2f}'.format(data), ha='center', va='center')

plt.show()'''

"figure = plt.figure(figsize=(9, 9))\naxes = figure.add_subplot(111)\naxes.set_title('Composed hierarchical MDP')\ncaxes = axes.matshow(Z_OPT, interpolation ='nearest') \n\nfor (i, j), data in np.ndenumerate(Z_OPT):\n  \n    axes.text(j, i, '{:0.2f}'.format(data), ha='center', va='center')\n\nplt.show()"

# Solving the flat MDP
<img src="pictures/setting.png" alt="" width="200" height="200">


In [28]:
P, G, z = lmdps.create_flat_MDP((2,2), R_DIM, {(2,2):18})

# Sparse
start_B = time.time()
for _ in range(10000):
    z = G * P * z
    
end_B = time.time()

print('Sparse matrices time', end_B - start_B)

Sparse matrices time 3.0230870246887207


In [29]:
Z = z.todense().copy()
V = np.log(Z)[:-8].reshape(10, 10)

figure = plt.figure(figsize=(10, 10))
axes = figure.add_subplot(111)
axes.set_title('Value function (np.log(Z)) for the Flat MDP')
caxes = axes.matshow(V, interpolation ='nearest') 

for (i, j), data in np.ndenumerate(V):
    axes.text(j, i, '{:0.2f}'.format(data), ha='center', va='center')

plt.show()

  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Measure Differences

## <span style="color: purple;"> Experiments  </span>

In [2]:
experiments = [((2,2), {(0,1):8}),
               ((3,3), {(0,2):8}),
               ((4,4), {(0,3):8}),
               ((5,5), {(0,4):8}),
               ((6,6), {(0,5):8}),
               ((7,7), {(0,6):8}),
               ((8,8), {(0,7):8}),
              ]

In [3]:
R_DIM =  5
N_iter = 10000 # 10k

times_H = {}
times_dense = {}
times_sparse = {}

for (rooms, goals) in experiments:
    
    # Hierarchical execution
    start_H = time.time()
    Z_H = lmdps.hierarchical_solver(rooms, R_DIM, goals, N_iter)
    end_H = time.time()
    time_H = end_H - start_H
    
    # Sparse and dense execution
    # Creation
    start_creation_time = time.time()
    P, G, z = lmdps.create_flat_MDP(rooms, R_DIM, goals)
    end_creation_time = time.time()
    
    creation_time = end_creation_time - start_creation_time
    
    # Sparse
    start_sparse = time.time()
    for _ in range(N_iter):
        z = G * P * z
    end_sparse = time.time()

    # Dense
    start_dense = time.time()
    P_dense = P.copy().todense()
    G_dense = G.copy().todense()
    z_dense = z.copy().todense()

    # Dense
    for _ in range(N_iter):
        z_dense = G_dense * P_dense * z_dense
    end_dense = time.time()
    
    time_sparse = creation_time + (end_sparse - start_sparse)
    time_dense = creation_time + (end_dense - start_dense)
    
    times_H[rooms] = time_H
    times_dense[rooms] = time_dense
    times_sparse[rooms] = time_sparse

  self._set_arrayXarray(i, j, x)
  self._set_intXint(row, col, x.flat[0])
  self._set_arrayXarray(i, j, x)
  self._set_intXint(row, col, x.flat[0])
  self._set_arrayXarray(i, j, x)
  self._set_intXint(row, col, x.flat[0])
  self._set_arrayXarray(i, j, x)
  self._set_intXint(row, col, x.flat[0])
  self._set_arrayXarray(i, j, x)
  self._set_intXint(row, col, x.flat[0])


KeyboardInterrupt: 

In [6]:
V = np.log(Z)

figure = plt.figure(figsize=(10, 10))
axes = figure.add_subplot(111)
axes.set_title('Value function (np.log(Z)) for the Flat MDP')
caxes = axes.matshow(V, interpolation ='nearest') 

for (i, j), data in np.ndenumerate(V):
    axes.text(j, i, '{:0.2f}'.format(data), ha='center', va='center')

plt.show()

NameError: name 'Z' is not defined

In [40]:
Z = lmdps.hierarchical_solver((2,2), 5, (1,3), [(0,1)], 100)

t_isx [2, 10, 14, 22, 8]
(0, 0) [2, 3]


IndexError: index 12 is out of bounds for axis 1 with size 9