In [1]:
import numpy as np
import time
import math
import matplotlib.pyplot as plt
%matplotlib inline

A simple example of runing all the available methods for solving discrete DMDP

# Create random discounted MDP

In [2]:
from Approximate_DMDP.DMDP_class import DMDP, create_random_DMDP

In [3]:
reward_func = lambda s, a: np.random.random()
nb_a = 5
nb_s = 4
gamma = 0.7

mdp = create_random_DMDP(nb_a, nb_s, reward_func, gamma)

# Value iteration

In [4]:
from Approximate_DMDP.value_iteration import run_value_iteration

In [5]:
start_time  = time.time()
pi_VI, V_VI = run_value_iteration(mdp, eps=0.001)

print("Execution time:", time.time() - start_time)
print("Policy:", pi_VI.T)
print("Value function", V_VI.T)

Execution time: 0.014036893844604492
Policy: [[ 2.95792124  3.0527193   2.90378262  3.09574147]]
Value function [[ 4.  0.  1.  2.]]


# Linear programmation

In [6]:
from Approximate_DMDP.linear_programming import LP_solving_DMDP

In [7]:
start_time  = time.time()
v_LP, pi_LP = LP_solving_DMDP(mdp)

print("Execution time:", time.time() - start_time)
print("Policy:", pi_LP.T)
print("Value function", v_LP.T)

Execution time: 0.033588409423828125
Policy: [[ 4.  0.  1.  2.]]
Value function [ 2.9587796  3.0535777  2.904641   3.0965999]


# High Precision Randomized value Iteration

In [8]:
from Approximate_DMDP.high_precision_randomized_VI \
    import high_precision_randomized_VI

In [9]:
start_time  = time.time()
v_HP, pi_HP, _ = high_precision_randomized_VI(mdp, eps=0.1, delta=0.1)

print("Execution time:", time.time() - start_time)
print("Policy:", pi_HP.T)
print("Value function", v_HP.T)

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:17<00:00,  2.85s/it]


Execution time: 17.100457906723022
Policy: [[ 4.  0.  1.  2.]]
Value function [[ 2.95879124  3.05358203  2.90465058  3.09658432]]


# Sublinear Time Randomized Value Iteration

In [10]:
from Approximate_DMDP.sublinear_radomizedVI \
    import sublinear_time_randomized_VI

In [11]:
start_time  = time.time()
v_sub, pi_sub, _ = sublinear_time_randomized_VI(mdp, eps=0.1, delta=0.1)

print("Execution time:", time.time() - start_time)
print("Policy:", pi_sub.T)
print("Value function", v_sub.T)

100%|███████████████████████████████████████████████████████████████████████████████████| 6/6 [39:21<00:00, 393.50s/it]


Execution time: 2361.0322020053864
Policy: [[ 4.  0.  1.  2.]]
Value function [[ 2.95880815  3.05363095  2.90469072  3.09665238]]


# Monotonic functions

In [4]:
from Approximate_DMDP.monotonic_randomized_VI \
    import sample_randomize_mon_VI, sublinear_random_mon_VI

In [5]:
start_time  = time.time()
v_mon_sub, pi_mon_sub, _ = sublinear_random_mon_VI(mdp, eps=0.1, delta=0.1)

print("Execution time:", time.time() - start_time)
print("Policy:", pi_mon_sub.T)
print("Value function", v_mon_sub.T)

  0%|                                                                                            | 0/6 [00:00<?, ?it/s]

KeyboardInterrupt: 