In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import time
import random
import numpy as np
import scipy.stats as st
import plotly.express as px

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from game_simulation import CoinGameSimulation

Cheaters are only biased against heads!

In [4]:
def simulate_strategy(strategy, n_simulations=500):
    # Simulate the game with the given strategy
    # strategy is a function that takes n_heads, n_tails and flips_left and returns an action in ("fair", "cheater", "one_flip", "five_flips")
    g = CoinGameSimulation()
    scores = []
    n_labels_list = []
    n_flips_list = []
    n_flips_per_label_list = []
    rewards = []
    for _ in range(n_simulations):
        n_heads, n_tails, flips_left = g.reset()
        n_labels = 0
        n_flips = 0
        n_flips_per_label = 0
        done = False
        while not done:
            action = strategy(n_heads, n_tails, flips_left)
            (n_heads, n_tails, flips_left), reward, done, _ = g.step(action)
            rewards.append(reward)

            if action == 2:
                n_labels += 1
                n_flips_per_label_list.append(n_flips_per_label)
                n_flips_per_label = 0
            elif action == 3:
                n_labels += 1
                n_flips_per_label_list.append(n_flips_per_label)
                n_flips_per_label = 0
            elif action == 0:
                n_flips_per_label += 1
                n_flips += 1
            elif action == 1:
                n_flips_per_label += 5
                n_flips += 5
            else:
                raise ValueError("Unknown action: {}".format(action))
        scores.append(g.score)
        n_labels_list.append(n_labels)
        n_flips_list.append(n_flips)
    
    for name, l in zip(["score", "n_labels", "n_flips", "n_flips_per_label", "reward"],[scores, n_labels_list, n_flips_list, n_flips_per_label_list, rewards]):
        print(name, np.mean(l), "±", np.std(l))
    return scores

Random choice gives a score of around 7

In [9]:
_ = simulate_strategy(lambda x, y, z: random.choice([2,3]))

score 7.358 ± 7.341242129231265
n_labels 14.752 ± 11.019550626046419
n_flips 0.0 ± 0.0
n_flips_per_label 0.0 ± 0.0
reward -7.554907809110629 ± 22.499933002844667


  logger.warn(


In [14]:
z = st.norm.ppf(.90)
p_fair = 0.5
p_cheater = 0.9

# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
def strategy1(n_heads, n_tails, flips_left):
    n = n_heads + n_tails
    if n == 0:
        return 1
    p_hat = n_heads / n

    interval = z * np.sqrt(p_hat * (1 - p_hat) / n)

    #print(n_heads, n_tails, flips_left)
    #print(p_hat - interval, p_hat + interval, 2*interval)


    if p_hat - interval < p_fair < p_cheater + interval:
        return 2
    
    if p_hat - interval < p_cheater < p_cheater + interval:
        return 3
    
    if flips_left <= 0:
        if abs(p_fair-p_hat) > abs(p_cheater-p_hat):
            return 2
        else:
            return 3
    
    return 0

_ = simulate_strategy(strategy1, n_simulations=1000)

score 10.097 ± 9.958593826439554
n_labels 14.758 ± 12.406104787563258
n_flips 133.796 ± 92.10742849520878
n_flips_per_label 9.065998102723947 ± 18.630874397452654
reward -1.3628940372199012 ± 8.669320596592165


In [13]:
# Vibe
def strategy2(n_heads, n_tails, flips_left):
    n_throws = n_heads + n_tails
    diff = (n_heads-n_tails)
    if diff > 5:
        return 3
    elif diff < 0 or (n_throws > 10 and diff < 1):
        return 2
    if flips_left <= 0:
        if diff > 4:
            return 3
        else:
            return 2
    return 0

random.seed(1)
_ = simulate_strategy(strategy2, n_simulations=1000)

score 32.969 ± 42.746555872958936
n_labels 44.387 ± 53.704164000568895
n_flips 267.313 ± 314.43099565882494
n_flips_per_label 6.022326356816185 ± 6.036929618535373
reward -0.3699647096567212 ± 7.582055544536433


In [None]:
#https://en.wikipedia.org/wiki/Checking_whether_a_coin_is_fair