# Blackjack
Planner __init__ expects a reward and transition matrix P, which is a nested dictionary 
[OpenAI Gym](https://www.gymlibrary.ml/) style discrete environment where 
P[state][action] is a list of tuples (probability, next state, reward, terminal).

The gym blackjack environment does not include this matrix, so we'll use the BlackjackWrapper class to create a wrapped gym environment, which modifies the observation space and includes P.  

In [None]:
!pip install bettermdptools

In [1]:
import gymnasium as gym
from bettermdptools.envs.blackjack_wrapper import BlackjackWrapper
from bettermdptools.utils.test_env import TestEnv
from bettermdptools.algorithms.planner import Planner
from bettermdptools.algorithms.rl import RL
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# This cell imports seed.py by adding bettermdptools\\bettermdptools to sys.path
import os
import sys

root_directory = os.path.abspath(os.path.join(os.getcwd(), '..'))
inner_bettermdptools_path = os.path.join(root_directory, 'bettermdptools')

if inner_bettermdptools_path not in sys.path:
    sys.path.insert(0, inner_bettermdptools_path)

from seed import set_seed
set_seed(42)

In [4]:
base_env = gym.make('Blackjack-v1', render_mode=None)
blackjack = BlackjackWrapper(base_env)

# The code below generates the same outputs while set_seed(42) outputs different results
# seed = 42
# base_env.reset(seed = seed)
# np.random.seed(seed=seed)

# run VI
V, V_track, pi = Planner(blackjack.P).value_iteration()

# #test policy
test_scores = TestEnv.test_env(env=blackjack, n_iters=100, render=False, pi=pi, user_input=False)
print(np.mean(test_scores))

# Q-learning
Q, V, pi, Q_track, pi_track, rewards = RL(blackjack).q_learning()

#test policy
test_scores = TestEnv.test_env(env=blackjack, n_iters=100, render=False, pi=pi, user_input=False)
print(np.mean(test_scores))

-0.12


                                                                                                                                       

-0.22


