In [8]:
from blackjack import BlackJack
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.max_rows', 180)

In [2]:
def choose_action(state, pi, epsilon):
    
    if random.random() < epsilon:
        return "hit" if random.random() > .5 else "stand"
    
    return pi[state]

In [3]:
game = BlackJack()
epochs = 1_000_000

Q = {state : {"hit" : 0, "stand" : 0} for state in game.states}
pi = {state : "hit" if random.random() > .5 else "stand" for state in game.states}
N = {state : 0 for state in game.states}

for _ in range(epochs):
    
    game.start()
    
    visited_states = []
    performed_actions = []
    
    while game.player_hand_sum < 21: # while the player hasn't busted
        
        state = game.get_current_state()
        
        action = choose_action(state, pi, 0)
        
        visited_states.append(state)
        performed_actions.append(action)
        
        if action == "hit":
            game.hit()
        else:
            game.stand() # if we stand, the round ends
            break
    
    reward = game.get_reward()
    
    for s, a in zip(visited_states, performed_actions):
        
        N[s] += 1 # count occurences of states
        
        # update the Q-value
        Q[s][a] += (reward - Q[s][a]) / N[s]
        
    for s in visited_states:
        
        # update the policy
        pi[s] = max(Q[s], key = Q[s].get)
    

Object created


In [4]:
sums = [a for (a,_,_) in pi]
ace = [b for (_,b,_) in pi]
card = [c for (_,_,c) in pi]

df = pd.DataFrame({"sum" : sums, "ace" : ace, "card" : card, "action" : pi.values(), "n" : N.values()})

In [9]:
df.sort_values(["sum", "action", "ace", "card"])

Unnamed: 0,sum,ace,card,action,n
132,12,False,8,hit,8326
146,12,False,10,hit,33205
79,12,True,1,hit,764
154,12,True,2,hit,1203
110,12,True,5,hit,1258
124,12,True,7,hit,1145
139,12,True,9,hit,1205
81,12,True,10,hit,5581
144,12,False,1,stand,8740
34,12,False,2,stand,7475


In [10]:
game.wins

492602

In [11]:
game.loses

533925