# Evaluating the Agents

First, a simulator to compare the agents must be built:

In [None]:
from Blackjack.Round import Round
from Blackjack.Dealer import Dealer
from Blackjack.Action import Action
from Blackjack.Agent.MonteCarloOnPolicyAgent import MonteCarloOnPolicyAgent
from Blackjack.Agent.SarsaOnPolicyAgent import SarsaOnPolicyAgent
from Blackjack.Agent.Agent import Agent
from Blackjack.Action import Action

class AgentSimulator():
    def __init__(self, agent: Agent):
        self.agent = agent
        self.round = None

    def play_round(self):
        self.round = Round()

        action = Action.HIT

        while action == Action.HIT and self.round.get_terminal_state() is None and self.round.get_sum_for_player(True) [0] != 21:
            action = self.agent.get_policy(self.round)

            if action == Action.HIT:
                self.round.hit()
            elif action == Action.STAND:
                self.round.stand()

            if isinstance(self.agent, SarsaOnPolicyAgent): # agents which update after each action (SARSA)
                self.agent.update_agent(self.round, self.agent.get_policy(self.round), self.round.get_terminal_state())

        # loss, agent sum went over 21
        if self.round.get_terminal_state() is not None: 
            # agents which update at the end of the episode (Monte Carlo) need to be updated
            # note that agents which update after each action (SARSA) already updated after applying the action resulting in a loss
            if isinstance(self.agent, MonteCarloOnPolicyAgent):
                self.agent.update_agent(self.round)

            self.agent.end_episode()

            return self.round.get_terminal_state(True)
        
        # switching turns
        if self.round.get_sum_for_player(True) [0] == 21:
            self.round.stand() # change turn to dealer

        dealer = Dealer()

        while dealer.get_policy(self.round.get_sum_for_player(False) [0]) == Action.HIT and self.round.get_terminal_state() == None:
            self.round.hit()

        # updating agents
        if isinstance(self.agent, MonteCarloOnPolicyAgent):
            self.agent.update_agent(self.round)
        elif isinstance(self.agent, SarsaOnPolicyAgent):
            self.agent.update_agent(self.round, Action.STAND, self.round.get_terminal_state(True))

        self.agent.end_episode()

        return self.round.get_terminal_state(True)

    def simulate_games(self, num_games):
        results = {
            "wins": 0,
            "losses": 0,
            "draws": 0
        }

        for _ in range(num_games):
            result = self.play_round()

            if result == 1:
                results ["wins"] += 1
            elif result == -1:
                results ["losses"] += 1
            else:
                results ["draws"] += 1

        return results

In [22]:
mcop_agent_1 = MonteCarloOnPolicyAgent(True, 1) # exploring starts, epsilon is 1/k
mcop_agent_1_simulator = AgentSimulator(mcop_agent_1)
mcop_agent_1_results = mcop_agent_1_simulator.simulate_games(100000)

In [23]:
mcop_agent_2 = MonteCarloOnPolicyAgent(False, 1) # no exploring starts, epsilon is 1/k
mcop_agent_2_simulator = AgentSimulator(mcop_agent_2)
mcop_agent_2_results = mcop_agent_2_simulator.simulate_games(100000)

In [24]:
mcop_agent_3 = MonteCarloOnPolicyAgent(False, 2) # no exploring starts, epsilon is e^(-k/1000)
mcop_agent_3_simulator = AgentSimulator(mcop_agent_3)
mcop_agent_3_results = mcop_agent_3_simulator.simulate_games(100000)

In [25]:
mcop_agent_4 = MonteCarloOnPolicyAgent(False, 3) # no exploring starts, epsilon is e^(-k/10000)
mcop_agent_4_simulator = AgentSimulator(mcop_agent_4)
mcop_agent_4_results = mcop_agent_4_simulator.simulate_games(100000)

In [26]:
sarsa_agent_1 = SarsaOnPolicyAgent(1) # epsilon is 0.1
sarsa_agent_1_simulator = AgentSimulator(sarsa_agent_1)
sarsa_agent_1_results = sarsa_agent_1_simulator.simulate_games(100000)

In [27]:
sarsa_agent_2 = SarsaOnPolicyAgent(2) # epsilon is 1/k
sarsa_agent_2_simulator = AgentSimulator(sarsa_agent_2)
sarsa_agent_2_results = sarsa_agent_2_simulator.simulate_games(100000)

In [28]:
sarsa_agent_3 = SarsaOnPolicyAgent(3) # epsilon is e^(-k/1000)
sarsa_agent_3_simulator = AgentSimulator(sarsa_agent_3)
sarsa_agent_3_results = sarsa_agent_3_simulator.simulate_games(100000)

In [29]:
sarsa_agent_4 = SarsaOnPolicyAgent(4) # epsilon is e^(-k/10000)
sarsa_agent_4_simulator = AgentSimulator(sarsa_agent_4)
sarsa_agent_4_results = sarsa_agent_4_simulator.simulate_games(100000)

In [30]:
print(mcop_agent_1_results)
print(mcop_agent_2_results)
print(mcop_agent_3_results)
print(mcop_agent_4_results)

{'wins': 35052, 'losses': 58753, 'draws': 6195}
{'wins': 34121, 'losses': 60141, 'draws': 5738}
{'wins': 42014, 'losses': 49029, 'draws': 8957}
{'wins': 41965, 'losses': 49828, 'draws': 8207}


In [31]:
print(sarsa_agent_1_results)
print(sarsa_agent_2_results)
print(sarsa_agent_3_results)
print(sarsa_agent_4_results)

{'wins': 41357, 'losses': 51699, 'draws': 6944}
{'wins': 42386, 'losses': 50219, 'draws': 7395}
{'wins': 42046, 'losses': 50654, 'draws': 7300}
{'wins': 41107, 'losses': 52264, 'draws': 6629}
