In [15]:
from blackjack import BlackjackEnv
import pandas as pd
import random

In [25]:
def policy(state, hard_strategy, soft_strategy):
    player_sum = state["player_sum"]
    dealer_card = state["dealer_card"]
    usable_ace = state["usable_ace"]

    # Adjust dealer index to start from 2
    dealer_index = dealer_card - 2 if dealer_card != 1 else 9

    # Hard vs Soft strategy
    if usable_ace and player_sum <= 21:
        strategy_table = soft_strategy
        player_index = player_sum - 4
    else:
        strategy_table = hard_strategy
        player_index = player_sum - 4

    # Ensure index is within bounds
    if 0 <= player_index < len(strategy_table) and 0 <= dealer_index < len(strategy_table[0]):
        action_index = strategy_table[player_index][dealer_index]
    else:
        # fallback strategy (e.g., stand on 17+)
        action_index = 1 if player_sum >= 17 else 0

    return ["hit", "stand", "hit"][action_index]


In [12]:
def simulate(env, policy_func, num_episodes=10000):
    results = {"wins": 0, "losses": 0, "draws": 0, "total_return": 0}

    for _ in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            legal_actions = env.get_legal_actions()
            action_str = policy_func(state)

            # if not legal, just stand
            if action_str not in legal_actions:
                action_str = "hit"

            state, reward, done = env.step(action_str)

        results["total_return"] += reward
        if reward > 0:
            results["wins"] += 1
        elif reward < 0:
            results["losses"] += 1
        else:
            results["draws"] += 1

    return {
        "average_return": results["total_return"] / num_episodes,
        "win_percent": results["wins"] / num_episodes,
        "loss_percent": results["losses"] / num_episodes,
        "draw_percent": results["draws"] / num_episodes
    }


In [4]:
env = BlackjackEnv(
    num_decks=6,
    dealer_hits_soft_17=False,
    allow_double=True,
    allow_split=False,
    allow_surrender=False
)

In [29]:
# Load strategy CSVs
DQN_hard_df = pd.read_csv("strategyTable/DQN_strategy_hard.csv", index_col=0)
DQN_soft_df = pd.read_csv("strategyTable/DQN_strategy_soft.csv", index_col=0)

# Convert to list of lists if needed
DQN_hard_strategy = DQN_hard_df.values.tolist()
DQN_soft_strategy = DQN_soft_df.values.tolist()

results = simulate(env, lambda x: policy(x, DQN_hard_strategy, DQN_soft_strategy), num_episodes=100000)
print(f"DQN Results {results}")


# Load strategy CSVs
MC_hard_df = pd.read_csv("strategyTable/MC_strategy_hard.csv", index_col=0)
MC_soft_df = pd.read_csv("strategyTable/MC_strategy_soft.csv", index_col=0)

# Convert to list of lists if needed
MC_hard_strategy = MC_hard_df.values.tolist()
MC_soft_strategy = MC_soft_df.values.tolist()

results = simulate(env, lambda x: policy(x, MC_hard_strategy, MC_soft_strategy), num_episodes=100000)
print(f"MC Results {results}")


# Load strategy CSVs
PPO_hard_df = pd.read_csv("strategyTable/PPO_strategy_hard.csv", index_col=0)
PPO_soft_df = pd.read_csv("strategyTable/PPO_strategy_soft.csv", index_col=0)

# Convert to list of lists if needed
PPO_hard_strategy = PPO_hard_df.values.tolist()
PPO_soft_strategy = PPO_soft_df.values.tolist()

results = simulate(env, lambda x: policy(x, PPO_hard_strategy, PPO_soft_strategy), num_episodes=100000)
print(f"PPO Results {results}")


# Load strategy CSVs
RRR_hard_df = pd.read_csv("strategyTable/RRR_strategy_hard.csv", index_col=0)
RRR_soft_df = pd.read_csv("strategyTable/RRR_strategy_soft.csv", index_col=0)

# Convert to list of lists if needed
RRR_hard_strategy = RRR_hard_df.values.tolist()
RRR_soft_strategy = RRR_soft_df.values.tolist()

results = simulate(env, lambda x: policy(x, RRR_hard_strategy, RRR_soft_strategy), num_episodes=100000)
print(f"RRR Results {results}")


results = simulate(env, lambda x: random.choice(["hit", "stand"]) , num_episodes=100000)
print(f"Random Results {results}")


# Load strategy CSVs
Expert_hard_df = pd.read_csv("strategyTable/Expert_hard.csv", index_col=0)
Expert_soft_df = pd.read_csv("strategyTable/Expert_soft.csv", index_col=0)

# Convert to list of lists if needed
Expert_hard_strategy = Expert_hard_df.values.tolist()
Expert_soft_strategy = Expert_soft_df.values.tolist()

results = simulate(env, lambda x: policy(x, Expert_hard_strategy, Expert_soft_strategy), num_episodes=100000)
print(f"Expert Results {results}")

DQN Results {'average_return': -0.06317, 'win_percent': 0.40905, 'loss_percent': 0.49282, 'draw_percent': 0.09813}
MC Results {'average_return': -0.023835, 'win_percent': 0.43218, 'loss_percent': 0.47679, 'draw_percent': 0.09103}
PPO Results {'average_return': -0.170495, 'win_percent': 0.37862, 'loss_percent': 0.56977, 'draw_percent': 0.05161}
RRR Results {'average_return': -0.0377, 'win_percent': 0.42767, 'loss_percent': 0.48661, 'draw_percent': 0.08572}
Random Results {'average_return': -0.3859, 'win_percent': 0.2804, 'loss_percent': 0.67686, 'draw_percent': 0.04274}
Expert Results {'average_return': -0.0234, 'win_percent': 0.43001, 'loss_percent': 0.47496, 'draw_percent': 0.09503}
