In [1]:
import environments
import numpy as np
from agents.agents import MonteCarloPredictor
import matplotlib.pyplot as plt

In [2]:
env = environments.make('hitstand')

In [3]:
type(env)

environments.blackjack.HitStand

In [4]:
env.action_space_description

{0: 'stand', 1: 'hit'}

In [5]:
env.reward_space_description

{-1: 'The House beats Jack',
 0: 'Draw/not terminal',
 1: 'Jack beats the House',
 1.5: 'Blackjack for Jack'}

In [6]:
env.observation_space_description

{0: "Player's total", 1: "Dealer's card value", 2: 'Player has got usable ace'}

In [7]:
env.observation_space_high

array([30, 26,  1])

In [8]:
env.observation_space_low

array([4, 2, 0])

In [9]:
class RandomPolicyAgent(MonteCarloPredictor):

    def follow_policy(self):
        return np.random.randint(0, self.environment.action_space_len)

In [10]:
agent = RandomPolicyAgent(env)

In [11]:
agent.follow_policy()

1

In [12]:
def run_experiment(env, agent, episodes, show, save=None):
    for episode in range(episodes):
        if (episode+1) % show ==0:
            print('Episode {0}:'.format(episode+1))
            env.render()

        s0, reward, terminal, _ = env.reset()
        agent.evaluate_state(s0, reward, terminal)
        while not terminal:
            action = agent.follow_policy()
            state, reward, terminal, _ = env.step(action)
            agent.evaluate_state(state, reward, terminal)        
    
        if (episode+1) % save == 0:
            agent.save_table(episode+1)


In [13]:
EPISODES = 100_000
SHOW_EVERY = 100
SAVE_EVERY = 10_000
run_experiment(env, agent, EPISODES, SHOW_EVERY, SAVE_EVERY)

Episode 100:
Jack:
	-Cards: ['QS', 'JH']
	-Value : 20
Dealer:
	-Cards: ['5D']
	-Value : 5
Jack decides to: STAND
	-Cards: ['QS', 'JH']
	-Value : 20
Dealer:
	-Cards: ['5D', '9D', 'AH', '3C']
	-Value : 18
JACK BEATS THE HOUSE
Episode 200:
Jack:
	-Cards: ['KC', '3S']
	-Value : 13
Dealer:
	-Cards: ['7H']
	-Value : 7
Jack decides to: STAND
	-Cards: ['KC', '3S']
	-Value : 13
Dealer:
	-Cards: ['7H', '9S', '3H']
	-Value : 19
THE HOUSE BEATS JACK
Episode 300:
Jack:
	-Cards: ['JD', '10C']
	-Value : 20
Dealer:
	-Cards: ['JD']
	-Value : 10
Jack decides to: STAND
	-Cards: ['JD', '10C']
	-Value : 20
Dealer:
	-Cards: ['JD', '6S', '5D']
	-Value : 21
THE HOUSE BEATS JACK
Episode 400:
Jack:
	-Cards: ['6H', '10H']
	-Value : 16
Dealer:
	-Cards: ['10D']
	-Value : 10
Jack decides to: STAND
	-Cards: ['6H', '10H']
	-Value : 16
Dealer:
	-Cards: ['10D', '5S', 'JD']
	-Value : 25
JACK BEATS THE HOUSE
Episode 500:
Jack:
	-Cards: ['6H', '7H']
	-Value : 13
Dealer:
	-Cards: ['3S']
	-Value : 3
Jack decides to: STAND
	

In [14]:
agent.save_at_episodes

[1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]

In [15]:
type(agent).__name__

'RandomPolicyAgent'