# 二十一点 Blackjack-v1

In [36]:
%matplotlib inline
import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt
import gym

In [46]:
def ob2state(observation):
    # 检查observation[0]是否为元组
    if isinstance(observation[0], tuple):
        inner_tuple = observation[0]  # 获取内部元组
        return inner_tuple[0], inner_tuple[1], inner_tuple[2]
    elif isinstance(observation[0], int):
        # 如果observation[0]是一个整数，返回默认值或根据需要进行处理
        return observation[0], 1, False  # 示例中的1和False可以根据实际情况调整
    else:
        raise ValueError("Unexpected observation structure")


def policy(state):
    # 简单策略：如果总分小于20，就继续抽牌
    return 0 if state[0] >= 20 else 1


def generate_episode(env):
    episode = []
    observation = env.reset()
    while True:
        state = ob2state(observation)
        action = policy(state)
        r = env.step(action)
#         print('r=', r)
        next_observation, reward, done, _, _ = r  # 添加额外的解包变量以匹配五个返回值
        episode.append((state, action, reward))
        if done:
            break
        observation = next_observation
    return episode



def evaluate_policy(env, episodes=10000):
    value_table = np.zeros((22, 11, 2))
    returns_count = np.zeros((22, 11, 2))
    
    for _ in range(episodes):
        episode = generate_episode(env)
        g = 0
        for state, action, reward in episode[::-1]:
            g += reward
            if state not in [t[0] for t in episode[:episode.index((state, action, reward))]]:
                returns_count[state[0], state[1], state[2]] += 1
                value_table[state[0], state[1], state[2]] += (g - value_table[state[0], state[1], state[2]]) / returns_count[state[0], state[1], state[2]]
    return value_table

# 初始化环境
env = gym.make('Blackjack-v1')

# 评估策略
value_table = evaluate_policy(env, 500)

# 打印状态价值表
print(value_table)


[[[ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]]

 [[ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]]

 [[ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.          0.        ]]

 [[ 0.          0.        ]
  [ 0.          0.        ]
  [ 0.        