In [None]:
import numpy as np

In [None]:
FAIR = 0
CHEAT = 1
HEAD = 0
TAILS = 1
prob_fair = 0.5
prob_cheat = 0.75
win_reward = 15
loss_reward = -30 
prior_prior = 0.5
prob_coin = 0.5

max_state = 5

actions = ['toss', 'fair', 'cheat']
states = np.array([[[i, j] for j in range(max_state)] for i in range(max_state)]).reshape(-1, 2)

In [None]:
def binomial(state, prob_head):
  return prob_head**state[0] * (1 - prob_head)**state[1]

In [None]:
def posterior_cheat(state, prior_cheat):
  return prior_cheat * binomial(state, prob_cheat) / (prior_cheat * binomial(state, prob_cheat) + (1 - prior_cheat) * binomial(state, prob_fair))

In [None]:
def expected(prob_cheat, aversion):
  reward_cheat = prob_cheat * win_reward + (1 - prob_cheat) * loss_reward * aversion
  reward_fair = (1 - prob_cheat) * win_reward + prob_cheat * loss_reward * aversion
  return max(reward_cheat, reward_fair)

In [None]:
def next_state(state):
  return [state + [0, 1], state + [1, 0]]

In [None]:
def prob_head(prob_cheat):
  return prob_cheat * prob_cheat + prob_fair * (1 - prob_cheat)

In [None]:
def expected_next(state, prior_cheat, aversion):
  expected_head = expected(posterior_cheat([1, 0], prior_cheat), aversion)
  expected_tails = expected(posterior_cheat([0, 1], prior_cheat), aversion)
  return expected_head * prob_head(prob_cheat) + expected_tails * (1 - prob_head(prob_cheat))

In [None]:
for state in states:
  prob = posterior_cheat(state, prior_prior)
  reward = expected(prob, 1)
  expected_head = expected(posterior_cheat([1, 0], prob), 1)
  expected_tails = expected(posterior_cheat([0, 1], prob), 1)
  next_reward = expected_next(state, prob, 1)
  print(f'state {state}:\t prob {prob: 0.2f};\t reward {reward: 0.2f};\t expected_head {expected_head: 0.2f};\t expected_tails {expected_tails: 0.2f};\t next_reward {next_reward: 0.2f}')

state [0 0]:	 prob  0.50;	 reward -7.50;	 expected_head -3.00;	 expected_tails  0.00;	 next_reward -2.06
state [0 1]:	 prob  0.33;	 reward  0.00;	 expected_head -4.29;	 expected_tails  6.00;	 next_reward -1.07
state [0 2]:	 prob  0.20;	 reward  6.00;	 expected_head  2.73;	 expected_tails  10.00;	 next_reward  5.00
state [0 3]:	 prob  0.11;	 reward  10.00;	 expected_head  7.89;	 expected_tails  12.35;	 next_reward  9.29
state [0 4]:	 prob  0.06;	 reward  12.35;	 expected_head  11.14;	 expected_tails  13.64;	 next_reward  11.92
state [1 0]:	 prob  0.60;	 reward -3.00;	 expected_head  1.15;	 expected_tails -4.29;	 next_reward -0.55
state [1 1]:	 prob  0.43;	 reward -4.29;	 expected_head -6.18;	 expected_tails  2.73;	 next_reward -3.39
state [1 2]:	 prob  0.27;	 reward  2.73;	 expected_head -1.20;	 expected_tails  7.89;	 next_reward  1.64
state [1 3]:	 prob  0.16;	 reward  7.89;	 expected_head  5.12;	 expected_tails  11.14;	 next_reward  7.00
state [1 4]:	 prob  0.09;	 reward  11.14;	 expe

In [None]:
def rand(prob):
  return int(np.random.rand() > prob)

In [None]:
def choose(state, aversion, biais, no_toss = False):
  prob = posterior_cheat(state, prior_prior)
  reward = expected(prob, aversion)
  next_reward = expected_next(state, prob, aversion) - 1
  if (reward > next_reward and reward > biais) or no_toss:
    return int(prob * win_reward + (1 - prob) * loss_reward < (1 - prob) * win_reward + prob * loss_reward)
  else:
    return 2

In [None]:
def game(aversion, biais):
  toss = 100
  data = [[0, 0], [0, 0]]
  success = 0

  while(toss>0):
    coin = rand(prob_coin)
    true_prob = prob_fair if coin else prob_cheat
    state = [0, 0]
    while(True):
      choice = choose(state, aversion, biais, toss <= 0)
      if choice == 2:
        toss += -1
        state[rand(true_prob)] += 1
      else:
        data[coin][choice] += 1
        if coin == choice:
          success += 1
          out = 'WINS'
          toss += win_reward
        else: 
          out = 'LOST'
          toss += loss_reward
        break
  return success
    # print(f'coin {coin} - true_prob {true_prob: 0.2f} - choice {choice} - out {out} - state {state} - success {success} - toss {toss} ')

In [None]:
def avg_game(aversion, biais, length):
  avg = 0
  for i in range(length):
    avg += game(aversion, biais) / length
  return avg

In [None]:
for aver in np.arange(0.8, 1.3, 0.1):
  for biais in np.arange(2.4, 3.61, 0.2):
    print(f'aver {aver}\tbiais {biais}: \t{avg_game(aver, biais, 1000):0.2f}')


aver 0.8	biais 2.4: 	121.62
aver 0.8	biais 2.6: 	125.17
aver 0.8	biais 2.8000000000000003: 	126.60
aver 0.8	biais 3.0000000000000004: 	240.47
aver 0.8	biais 3.2000000000000006: 	214.78
aver 0.8	biais 3.400000000000001: 	203.16
aver 0.8	biais 3.600000000000001: 	315.43
aver 0.9	biais 2.4: 	225.19
aver 0.9	biais 2.6: 	360.05
aver 0.9	biais 2.8000000000000003: 	288.94
aver 0.9	biais 3.0000000000000004: 	286.71
aver 0.9	biais 3.2000000000000006: 	426.81
aver 0.9	biais 3.400000000000001: 	419.71
aver 0.9	biais 3.600000000000001: 	669.66
aver 1.0	biais 2.4: 	418.94
aver 1.0	biais 2.6: 	475.70
aver 1.0	biais 2.8000000000000003: 	472.72
aver 1.0	biais 3.0000000000000004: 	536.81
aver 1.0	biais 3.2000000000000006: 	673.14
aver 1.0	biais 3.400000000000001: 	644.07
aver 1.0	biais 3.600000000000001: 	505.63
aver 1.1	biais 2.4: 	625.27
aver 1.1	biais 2.6: 	833.84
aver 1.1	biais 2.8000000000000003: 	567.66
aver 1.1	biais 3.0000000000000004: 	628.06
aver 1.1	biais 3.2000000000000006: 	854.39
aver 1.1