# Heuristic Modelling

Here we first attempt to heuristically come up with probabilistic models of how humans take actions during blackjack. Then we use negative log-likelihoods to compare the heuristics and see which heuristic is mostly likely being followed by humans. We also compare these heuristic models with the optimal state-action function based model, and see which is more likely!


In [59]:
from game import *
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import colors
from mpl_toolkits.mplot3d import Axes3D  
import random
from matplotlib.pyplot import figure
from read_data import *
import math
from scipy.special import comb
from scipy.optimize import fminbound

In [41]:
# bootstrap code
def get_state(game):
    return (game.get_sum(game.get_player_hand()), game.get_hand_value(game.get_dealer_hand())[0], game.hasAce)

valid_states = [(x,y,z) for z in [True,False] for x in range(12,22) for y in range(2,12)]
states_without_ace = [(x,y,False) for x in range(12,22) for y in range(2,12)]
states_with_ace = [(x,y,True) for x in range(12,22) for y in range(2,12)]

def state_to_index(st):
    if st in valid_states:
        return valid_states.index(st)
    else:
        return -1

state_to_index((20, 2, True))
state_to_index((22, 2, True))

-1

### Model 1
#### P(Winning | Current State, Hit Once)

This model suggests that humans base their decision (to hit or not) on the probability of winning (player sum being less than or equal to 21 and dealer sum being lesser than player sum) if they hit once and then stop. We use monte-carlo sampling to calculate P(winning | current_state, hit_once). Optimally the player should base this decision on the possiblity of winning given he can hit multiple times, but we know that human mind doesn't always take decisions on a full-width exact accuracy model.

In [42]:
SAMPLES = 500
def winning_based_model(states, alpha):
    wins = np.zeros(len(states), dtype = float)
    for state in valid_states:
        for i in range(SAMPLES):
            ps, dc, ace = state
            game = BlackJack()
            game.set_state(ps, dc, ace)
            if game.hit() == Status.PLAYER_BUST:
                continue
            if game.stand() == Status.PLAYER_WON:
                wins[state_to_index(state)] += 1
    wins = wins/SAMPLES
    wins = (1-alpha)*wins + alpha*0.5
    return wins

In [60]:
def compute_log_likelihood(T, H, p):
    p = p if p > 0.0 else 0.0+1e-10
    p = p if p < 1.0 else 1.0-1e-10
    result = math.log(comb(T, H)) + (H*math.log(p) + (T-H)*math.log(1.0-p))
    return result
    
def fit_winning_based_model(alpha, human_data):
    probabilities = winning_based_model(valid_states, alpha)
    total = 0
    for index,row in human_data.iterrows():
        total += compute_log_likelihood(row['Count'], row['HIT'], probabilities[state_to_index(row['State'])])
    return -1*total
    
data = get_human_results()
fit_winning_based_model(0.1, data)
minimizing_alpha = fminbound(fit_winning_based_model, 0, 1, args=(data,))
fit_winning_based_model(minimizing_alpha, data)

288.40595221233787

In [61]:
minimizing_alpha

0.8923681894742407

In [65]:
-1*compute_log_likelihood(20,15,0.9)

3.444479865707077