In [None]:
import numpy as np
from numpy.random import default_rng
rng = default_rng()
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets  # interactive display
from scipy.stats import poisson

%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.style.use("../JC.mplstyle")


In [None]:
import random
from collections import defaultdict

def deal_card():
    cards = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11]
    return random.choice(cards)

def generate_episode(policy):
    player_sum = random.randint(12, 21)
    dealer_card = deal_card()
    usable_ace = random.choice([True, False])

    state = (player_sum, dealer_card, usable_ace)
    episode = []

    while True:
        action = policy(state)
        episode.append((state, action))

        if action == 0:
            break

        card = deal_card()
        if card == 11 and player_sum + card > 21:
            card = 1

        player_sum += card
        usable_ace = (usable_ace or card == 11) and player_sum <= 21
        state = (player_sum, dealer_card, usable_ace)

        if player_sum > 21:
            break

    return episode

def fixed_policy(state):
    player_sum, _, _ = state
    return 1 if player_sum < 20 else 0

def simulate_blackjack(policy, num_episodes):
    values_sum = defaultdict(float)
    num_visits = defaultdict(int)

    for _ in range(num_episodes):
        episode = generate_episode(policy)
        reward = -1 if episode[-1][0][0] > 21 else play_dealer(episode[-1][0][1])

        for state, _ in episode:
            num_visits[state] += 1
            values_sum[state] += reward

    state_values = {state: value_sum / num_visits[state] for state, value_sum in values_sum.items()}
    return state_values

def play_dealer(dealer_card):
    dealer_sum = dealer_card
    usable_ace = dealer_card == 11

    while dealer_sum < 17:
        card = deal_card()
        if card == 11 and dealer_sum + card > 21:
            card = 1

        dealer_sum += card
        usable_ace = (usable_ace or card == 11) and dealer_sum <= 21

    return 1 if dealer_sum > 21 or dealer_sum < 17 else 0

if __name__ == "__main__":
    num_episodes = 500000
    state_values = simulate_blackjack(fixed_policy, num_episodes)

    for state, value in sorted(state_values.items()):
        print(f"State: {state}, Value: {value:.2f}")
