In [1]:
from lib.util import *
from lib.mp import *

# Markov Processes

A set of states $S$ and a state transition probability matrix $P$ s.t. the coefficients are $\mathbb{P}[X_{n+1} \mid X_n] = \mathbb{P}[X_{n+1} \mid X_1, \dots, X_n]$

In [4]:
n = 7

In [5]:
P = generate_stochastic_matrix(n)
mp = MP(P)

print(mp.stationary_distribution())

[0.17600115 0.13364374 0.12834294 0.1223978  0.12667507 0.16953311
 0.14340619]


# Markov Reward Processes

A set of states $S$, a state transition probability matrix $P$, a reward function $R$ s.t. $R(s) = \mathbb{E}[R_{n+1} \mid S_n = s]$, and a discount factor $\gamma \in [0, 1]$

The state value function $v(s) = \mathbb{E}[G_t \mid S_t = s]$ of an MRP is the expected return starting from state $s$, where $G_t = \sum_{k=0}^\infty \gamma^k R_{t+k+1}$ is the total discounted reward from time $t$

Bellman Equation for MRP:
$$ v(s) = \mathbb{E}[R_{t+1} + \gamma v(S_{t+1}) \mid S_t = s] = R(s) + \gamma \sum_{s' \in S} P(s, s') v_\pi(s')$$

Matrix form of the Bellman Equation for MRP:
$$ v = R + \gamma P v$$ 

In [6]:
from lib.mrp import *

In [7]:
n = 7
gamma = 0.8

In [8]:
P = generate_stochastic_matrix(n)
R = generate_reward_vector(n)
mrp = MRP(P, R, gamma)

print(mrp.get_value_function())

[1.87663169 1.93972051 2.27620784 2.37125923 1.7056576  2.21496002
 1.8942522 ]


In [9]:
P = generate_stochastic_matrix(n)
R = generate_reward_matrix(n)
mrp = MRP_2(P, R, gamma)

print(mrp.get_reward_per_state().reward)

[0.47040936129183886, 0.462686941947539, 0.324577898172474, 0.5630459991359351, 0.29055892607453154, 0.4428247927150731, 0.21116365506583692]
