In [14]:
import numpy as np

P = np.array(
    [
        [0, 1, 0, 0],
        [0.2, 0.2, 0.5, 0.1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
    ]
)
R = np.array([[-2], [-1], [10], [0]])
gamma = 0.9
V = np.linalg.inv(np.eye(4) - gamma * P) @ R
V

array([[ 2.29483283],
       [ 4.77203647],
       [10.        ],
       [ 0.        ]])

In [42]:
from tabulate import tabulate

# Value Iteration
P = np.array(
    [
        [[0, 1, 0, 0], [0, 0, 0, 1]],  # (Study, Quit)
        [[0.2, 0.2, 0.5, 0.1], [0, 0, 0, 1]],  # (Defend, Quit)
        [[0, 0, 0, 1], [0, 0, 0, 1]],
        [[0, 0, 0, 1], [0, 0, 0, 1]],
    ]
)  # (s, a, s')
R = np.array([[-2, 0], [-1, 0], [10, -np.inf], [0, -np.inf]])  # (s, a)


def vi(gamma):
    V = np.zeros(4)
    for _ in range(100):
        Q = R + gamma * P @ V
        V = np.max(Q, axis=1)
        pi = np.argmax(Q, axis=1)
    V = ', '.join([f'{v:.3g}' for v in V])
    V = f'({V})'
    if pi[0] == 0 and pi[1] == 0:
        pi = '(Study, Defend)'
    elif pi[0] == 0 and pi[1] == 1:
        pi = '(Study, Quit)'
    elif pi[0] == 1:
        pi = 'Quit'
    return V, pi


print(
    tabulate(
        [(gamma, *vi(gamma)) for gamma in np.arange(0.1, 1.1, 0.1)],
        headers=['$\gamma$', '$\mathbf{V}^*$', '$\pi^*$'],
        colalign=('center', 'center', 'center'),
        tablefmt='pipe',
    )
)

|  $\gamma$  |    $\mathbf{V}^*$     |     $\pi^*$     |
|:----------:|:---------------------:|:---------------:|
|    0.1     |     (0, 0, 10, 0)     |      Quit       |
|    0.2     |     (0, 0, 10, 0)     |      Quit       |
|    0.3     |   (0, 0.532, 10, 0)   |      Quit       |
|    0.4     |   (0, 1.09, 10, 0)    |      Quit       |
|    0.5     |   (0, 1.67, 10, 0)    |      Quit       |
|    0.6     |   (0, 2.27, 10, 0)    |      Quit       |
|    0.7     | (0.0394, 2.91, 10, 0) | (Study, Defend) |
|    0.8     |  (1.01, 3.76, 10, 0)  | (Study, Defend) |
|    0.9     |  (2.29, 4.77, 10, 0)  | (Study, Defend) |
|     1      |     (4, 6, 10, 0)     | (Study, Defend) |
