In [None]:
import gym
import gym_blackjack
from td_lambda_search import TD_Lambda_Search

<h3>The Agent</h3>

In [None]:
# The agent is designed to be very easy
# to instantiate and begin learning. Hyperparameter
# selection and loading previous models are both
# available, but not required.

env = gym.make('blackjack-v0')
env.reset()
agent = TD_Lambda_Search(env)

In [None]:
for i in range(1):
    agent.one_game()

In [None]:
agent.critic.save("test_td_lambda")

<h3>Evaluation</h3>

Blackjack is a solved game with a mathematically-proven optimal strategy. We will evaluate the RL agent based on its convergence to this optimal strategy. To do this, we will create a DataFrame containing every possible state and the right correct play, then pass each state to the model to predict.

In [None]:
from itertools import product

states_list = list(product(range(12,22), range(2,11), range(2), [0]))
states_aces = list(product(range(12,22), [11], range(2), [1]))

states_list.extend(states_aces)

print(states_list)

In [None]:
import pandas as pd

df = pd.DataFrame(data=states_list, columns=['Player Value', 'Dealer Upcard', 'Player Ace', 'Dealer Ace'])

In [None]:
df.head()

In [None]:
# Initialize the Correct Action column full of hits
# Initialize the Agent column arbitrarily
df['Correct'] = 1
df['Agent'] = 0

In [None]:
# Modify the Correct Action column to include when it 
# is appropriate to stay (no double down or splits)
# Taken from:
# https://www.blackjackapprenticeship.com/blackjack-strategy-charts/

df.loc[(df['Player Value'] >= 17) & (df['Player Ace'] == 0), 'Correct'] = 0
df.loc[(df['Player Value'] >= 13) & (df['Player Ace'] == 0) &\
       (df['Player Value'] <= 16) & (df['Dealer Upcard'] <=6), 'Correct'] = 0
df.loc[(df['Player Value'] == 12) & (df['Player Ace'] == 0) &\
       (df['Dealer Upcard'] >= 4) & (df['Dealer Upcard'] <=6), 'Correct'] = 0

df.loc[(df['Player Value'] >= 19) & (df['Player Ace'] == 1), 'Correct'] = 0
df.loc[(df['Player Value'] == 18) & (df['Player Ace'] == 1) &\
       (df['Dealer Upcard'] <= 8), 'Correct'] = 0

In [None]:
# Get the values that the model
for i in range(len(states_list)):
    df.iloc[i]['Agent'] = agent.greedy(states_list[i])

In [None]:
# Compare the Agent with a Mathematically Perfect Game

df.head(400)