In [11]:
from pig_tools import PigWorld
from pig_tools import value_iteration
from pig_tools import get_decision_space
import plotly.express as px
import plotly.graph_objects as go

# Simple coin game

- Class PigWorld can be used to set up the type of game the optimal strategy should be computed for.
- For example for the simple coin game with winning_score=2:

In [2]:
num_sides = 2
winning_score = 2
game_type = 'coin'
env = PigWorld(num_sides=num_sides,
               winning_score=winning_score,
               game_type=game_type)

08:21:32 building state space
built state space of length 10
08:21:32 finished building env


- We can look at some of the details in the env:
    - the full state space: env.S
    - the possible actions (same for all games): env.A
    - the terminal states: env.terminal_states
    - the non terminal states: env.non_terminal_states
- Note: 
    - states are encoded as tuples (i, j, k) corresponding to (score player 1, score player 2, turn total)

In [9]:
print('States Space:', env.S)
print('Actions:', env.A)
print('terminal_states:', env.terminal_states)
print('non_terminal_states:', env.non_terminal_states)

States Space: ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 1, 0), (0, 1, 1), (0, 1, 2), (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1))
Actions: ('roll', 'hold')
terminal_states: ((0, 0, 2), (0, 1, 2), (1, 0, 1), (1, 1, 1))
non_terminal_states: ((0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 1, 0))


- Then value_iteration can be used to compute the optimal strategy.
- theta defines the stopping rule for the approximation of the value function V
- value_iteration returns the value function V and the q-values
- parameter 'iteration_type' can be left at 'backward_split', which corresponds to the implementation of value iteration most tailor-made for this problem (see Neller, Presser or the comments in the docstring)

In [3]:
theta = 0.0001
V, q_values = value_iteration('backward_split', env, theta)

08:25:32 Started backward_split value iteration
08:25:32 iterate on score_sum = 2 (1 elements)
08:25:32 iterate on score_sum = 1 (3 elements)
08:25:32 iterate on score_sum = 0 (2 elements)
08:25:32 Finished backward_split value iteration


- We can see below that values for the different states match the results given in Neller, Presser
- Note: terminal states, e.g. (0, 0, 2), do not have any inherent value, only moving into them does

In [4]:
V

{(0, 0, 0): 0.5714649310897439,
 (0, 0, 1): 0.714267534455128,
 (0, 0, 2): 0,
 (0, 1, 0): 0.4000813812017441,
 (0, 1, 1): 0.6000406891107559,
 (0, 1, 2): 0,
 (1, 0, 0): 0.799959309399128,
 (1, 0, 1): 0,
 (1, 1, 0): 0.66668701171875,
 (1, 1, 1): 0}

- we can also look at q_values, e.g. 'roll' corresponds to p(win | roll) 

In [5]:
q_values

Unnamed: 0,Unnamed: 1,Unnamed: 2,roll,hold
0,0,0,0.571465,0.42862
0,0,1,0.714268,0.599919
0,1,0,0.400081,0.200081
0,1,1,0.600041,0.333313
1,0,0,0.799959,0.599919
1,1,0,0.666687,0.333374


***
# Dice game

- we now compute the optimal strategy for a 6 sided dice game with winning score 50
- we use the function get_decision_space to augment the q-values a bit for plotting decision boundaries and so on

In [10]:
num_sides = 6
winning_score = 50
game_type = 'dice'
env = PigWorld(num_sides=num_sides,
               winning_score=winning_score,
               game_type=game_type)
theta = 0.00001
V, q_values = value_iteration('backward_split', env, theta)
# filename = f'q_values_{game_type}_{num_sides}_{winning_score}.gz'
# q_values.to_pickle(filename)
# q_values =pd.read_pickle(filename)
decision_space = get_decision_space(q_values)

08:59:51 building state space
built state space of length 76250
08:59:51 finished building env
08:59:51 Started backward_split value iteration
08:59:52 iterate on score_sum = 98 (1 elements)
08:59:52 iterate on score_sum = 97 (2 elements)
08:59:52 iterate on score_sum = 96 (4 elements)
08:59:52 iterate on score_sum = 95 (7 elements)
08:59:52 iterate on score_sum = 94 (11 elements)
08:59:52 iterate on score_sum = 93 (16 elements)
08:59:52 iterate on score_sum = 92 (22 elements)
08:59:52 iterate on score_sum = 91 (29 elements)
08:59:52 iterate on score_sum = 90 (37 elements)
08:59:52 iterate on score_sum = 89 (46 elements)
08:59:52 iterate on score_sum = 88 (56 elements)
08:59:52 iterate on score_sum = 87 (67 elements)
08:59:52 iterate on score_sum = 86 (79 elements)
08:59:52 iterate on score_sum = 85 (92 elements)
08:59:52 iterate on score_sum = 84 (106 elements)
08:59:52 iterate on score_sum = 83 (121 elements)
08:59:52 iterate on score_sum = 82 (137 elements)
08:59:52 iterate on score

- next a quick look at how the df decision_space looks like
    - roll_flag = 1 iff roll > hold otherwise = 0
    - boundary_flag = 1 iff roll_flag changes value on consecutive turn_totals (for score_1 and score_2 fixed) (see also plots below)

In [12]:
decision_space

Unnamed: 0,score_1,score_2,turn_total,roll,hold,roll_flag,boundary_flag
0,0,0,0,0.546124,0.453871,1,0
1,0,0,2,0.554832,0.473742,1,0
2,0,0,3,0.559484,0.483829,1,0
3,0,0,4,0.564346,0.494205,1,0
4,0,0,5,0.569441,0.504858,1,0
...,...,...,...,...,...,...,...
61295,49,45,0,0.870266,0.221595,1,0
61296,49,46,0,0.865385,0.192307,1,0
61297,49,47,0,0.861244,0.167464,1,0
61298,49,48,0,0.857143,0.142857,1,0


### plot p(win | roll) and p(win | hold) along turn_total for score_1 and score_2 fixed:

In [13]:
score_1 = 10
score_2 = 0
temp_flag = ((decision_space['score_1'] == score_1) & (decision_space['score_2'] == score_2))
decision_line = decision_space[temp_flag].copy()

fig = go.Figure()
fig.add_trace(go.Scatter(x=decision_line['turn_total'],
                         y=decision_line['roll'],
                         mode='lines',
                         name='roll'))
fig.add_trace(go.Scatter(x=decision_line['turn_total'],
                         y=decision_line['hold'],
                         mode='lines',
                         name='hold'))
fig.update_layout(title=f'P(win | roll) and P(win | hold) across turn-totals for score_1 = {score_1} and score_2 = {score_2}',
                   xaxis_title='turn total',
                   yaxis_title='probability of win')
fig.show()

### plot decision boundary for score_2 fixed at 0

- each dot in the plot represents the last value of a switch in the roll_flag
- note: at turn_total = 0 it is always better to roll, i.e. this defines the start of the decision space
- e.g. for a score of player 1 of 5 (and player 2 of 0 - which is fixed):
    - it is optimal to roll until the turn total is 17 (or above)
    - if 39 >= turn_total > 17 it is optimal to hold
    - for turn_total > 39 it is optimal to roll
    - note that an optimal player will not reach turn total > 39, since he will hold before, i.e. only a non-optimal player can reach this state
    - in fact an optimal player can only reach turn_total = 17 + 6 = 23
- note that for score_1 > 18 (and fixed score_2 = 0) it is always optimal to roll, regardless of turn_total

In [14]:
score_2 = 0
temp_flag = ((decision_space['score_2'] == score_2) & (decision_space['boundary_flag'] == 1))
decision_line = decision_space[temp_flag].copy()
fig = px.scatter(decision_line, x='score_1', y='turn_total')
fig.update_layout(title=f'Decision boundary for the score of player 2 fixed at {score_2}',
                  xaxis_title='Score of Player 1')
fig.show()

### 3d plot of the full decision boundary:

In [16]:
temp_flag = (decision_space['boundary_flag'] == 1)
boundary_space = decision_space[temp_flag].copy()

fig = px.scatter_3d(boundary_space, 
                    x='score_1',
                    y='score_2',
                    z='turn_total',
                    color='score_2',
                    color_continuous_midpoint=boundary_space['score_2'].mean(),
                    # size='turn_total',
                    # opacity=0.7
                    )
fig.update_layout(title='Decision boundary', autosize=False,
                  width=700, height=700,
                  margin=dict(l=50, r=50, b=50, t=50))
# following: https://plotly.com/python/3d-camera-controls/
camera = dict(
    # default values
    # up=dict(x=0, y=0, z=1),
    # center=dict(x=0, y=0, z=0),
    # eye=dict(x=1.25, y=1.25, z=1.25)
    # changed camera position:
    up=dict(x=0, y=0, z=1),
    center=dict(x=0, y=0, z=0),
    eye=dict(x=0.7, y=-2.5, z=0)
)
fig.update_layout(scene_camera=camera)
fig.show()