In [1]:
import numpy as np
import random
from gymnasium.spaces import Box, Space
from poke_env.player import Gen9EnvSinglePlayer, RandomPlayer, Player
from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.teambuilder import Teambuilder

# from poke_env.player

In [2]:
from poke_env import AccountConfiguration, ServerConfiguration
from poke_env.teambuilder.teambuilder import Teambuilder


class QAgent(Player):
    def __init__(self, account_configuration: AccountConfiguration | None = None, *, avatar: str | None = None, battle_format: str = "gen9randombattle", log_level: int | None = None, max_concurrent_battles: int = 1, accept_open_team_sheet: bool = False, save_replays: bool | str = False, server_configuration: ServerConfiguration | None = None, start_timer_on_battle_start: bool = False, start_listening: bool = True, ping_interval: float | None = 20, ping_timeout: float | None = 20, team: str | Teambuilder | None = None):
        super().__init__(account_configuration, avatar=avatar, battle_format=battle_format, log_level=log_level, max_concurrent_battles=max_concurrent_battles, accept_open_team_sheet=accept_open_team_sheet, save_replays=save_replays, server_configuration=server_configuration, start_timer_on_battle_start=start_timer_on_battle_start, start_listening=start_listening, ping_interval=ping_interval, ping_timeout=ping_timeout, team=team)

        self.q_table = {}
        self.epsilon = 0.5
        self.gamma = 0.95
        self.alpha = 0.1
        self.last_state = None
        self.last_action = None
        self.current_state = None
        self.last_hp = 100

    def embed_moves(self, battle: AbstractBattle): # 2 to 13 #sho: added tera type, 3 to 14?
        embedding = []
        for move in battle.available_moves:
            embedding += [move.base_power, move.type, move.category, move.current_pp]

        return embedding

    def list_to_tuple(self, embedding):
        return tuple(embedding)

    def embed_pokemon(self, battle: AbstractBattle): # 0 and 1 #sho: added tera type, 0,1 and 2?
        embedding = []

        # embedding += battle.active_pokemon.base_stats
        embedding += battle.active_pokemon.current_hp
        embedding += battle.active_pokemon.type
        embedding += battle.active_pokemon.tera_type

        return embedding

    def embed_battle(self, battle: AbstractBattle):
        """Return a list containing info about the game state"""
        embedding = (battle.opponent_active_pokemon.base_species, battle.active_pokemon.terastallized,)
        # embedding += self.embed_pokemon(battle)
        # embedding += self.embed_moves(battle)
        # embedding += [battle.opponent_active_pokemon]

        return embedding
    
    # def find_best_move_in_table(self, battle, encoding):
    #     max_score = -float("Inf")
    #     best_move = None
    #     for i in battle.available_moves:
    #         if self.q_table[encoding] > max_score:
    #             max_score = self.q_table[encoding]
    #             best_move = i

    #     return best_move, max_score


    def choose_move(self, battle):
        # print(self.q_table)
        encoding = self.embed_battle(battle)

        if battle.turn == 1:
            self.last_hp = 100

        self.last_state = self.current_state
        self.current_state = encoding
        # print(self.last_hp, battle.opponent_active_pokemon.current_hp, battle.turn)

        if battle.turn > 1:
            self.update_q_table(battle)
        
        if encoding in self.q_table:
            if random.random() < self.epsilon:
                best_move = np.argmax(self.q_table[encoding])

                #best_move, max_score = self.find_best_move_in_table(battle, encoding)

                # if tera_score > best_score:

                # If the best action is Tera (can comment out this whole case)
                if best_move == 4 and battle.can_tera:  
                #if self.q_table[encoding][4] > max_score:
                    self.last_action = best_move
                    move_while_tera = np.argmax(self.q_table[encoding][:4])

                    #print("move while tera pp,", move_while_tera.current_pp)
                    #print("move while tera pp,", battle.available_moves[move_while_tera].current_pp)
                    #print("move while tera pp,", battle.active_pokemon.moves[battle.available_moves[move_while_tera].id].current_pp)
                    #print("move while tera pp,", battle.available_moves[move_while_tera].max_pp)

                    #move_while_tera = self.find_best_move_in_table(battle,)
                    
                    if battle.available_moves[move_while_tera].current_pp > 0:
                        return self.create_order(battle.available_moves[move_while_tera], terastallize=True)
                    else:
                        # Move while Tera is not usable, fallback to the next best move
                        second_best_move = np.argsort(self.q_table[encoding][:4])[-2]  # Select the second best move
                        return self.create_order(battle.available_moves[second_best_move], terastallize=True)
    
                    # turn off tera ver.
                    #return self.create_order(battle.available_moves[best_move], terastallize=True)
                    
                # when best move not tera or after tera
                self.last_action = best_move
                #print("best move,", best_move)
                #print("move pp,", best_move.current_pp)
                #print("move pp,", battle.available_moves[best_move].current_pp)
                #print("move pp,", battle.active_pokemon.moves[battle.available_moves[best_move].id].current_pp)
                #print("move name,", battle.available_moves[best_move])
                #print("move pp,", battle.available_moves[best_move].current_pp)

                if battle.available_moves[best_move].current_pp > 0:
                    return self.create_order(battle.available_moves[best_move])
                else:
                    #The best move is not available, fallback to the next best move
                    second_best_move = np.argsort(self.q_table[encoding][:4])[-2]  # Select the second best move
                    return self.create_order(battle.available_moves[second_best_move])
                
                #return self.create_order(battle.available_moves[best_move])
                
        # Random action selection now includes Tera
        random_move = random.randint(0, 4)  # Includes Tera as an option
        if random_move == 4 and battle.can_tera:
            self.last_action = random_move
            random_move_while_tera = random.randint(0, 3)  # Pick a move after Tera

            #print("random move pp,", random_move_while_tera.current_pp)
            #print("random move while tera pp,", battle.available_moves[random_move_while_tera].current_pp)
            #print("random move while tera pp,", battle.active_pokemon.moves[battle.available_moves[random_move_while_tera].id].current_pp)
            #print("random move while tera pp,", battle.available_moves[random_move_while_tera].max_pp)

            #random_move_while_tera = random.choice(battle.available_moves)
            
            if battle.available_moves[random_move_while_tera].current_pp > 0:
                return self.create_order(battle.available_moves[random_move_while_tera], terastallize=True)
            else:
                available_moves = [i for i in range(len(battle.available_moves)) if battle.available_moves[i].current_pp > 0 and i != random_move]
                # Check if there are any available moves left
                if available_moves:
                    next_random_move = random.choice(available_moves)  # Select a random move from the available moves
                    return self.create_order(battle.available_moves[next_random_move], terastallize=True)
            
        
            #return self.create_order(battle.available_moves[random_move_while_tera], terastallize=True)
            #print(f"Choosing move: {battle.available_moves[random_move].id}")

        random_move = random.randint(0, 3)
        #random_move = random.choice(battle.available_moves)
        self.last_action = random_move

        #print("random move pp,", random_move.current_pp)
        #print("random move pp,", battle.available_moves[random_move].current_pp)
        #print("random move pp,", battle.active_pokemon.moves[battle.available_moves[random_move].id].current_pp)
        #print("random move name,", battle.available_moves[random_move])
        #print("random move pp,", battle.available_moves[random_move].current_pp)

        if battle.available_moves[random_move].current_pp > 0:
            return self.create_order(battle.available_moves[random_move])
        else:
            # The random move selected is not available
            available_moves = [i for i in range(len(battle.available_moves)) if battle.available_moves[i].current_pp > 0 and i != random_move]
            # Check if there are any available moves left
            if available_moves:
                next_random_move = random.choice(available_moves)  # Select a random move from the available moves
                return self.create_order(battle.available_moves[next_random_move])
            
        #return self.create_order(random_move)

    def select_move(self, move, battle):
        return self.create_order(battle.available_moves[move])

    def change_epsilon(self, new):
        self.epsilon = new

    def new_q_table(self, table):
        self.q_table = table

    def calc_reward(self, battle):
        score = 0

        if battle.active_pokemon.fainted:
            score -= 50

        if battle.active_pokemon.fainted:
            score += 50

        hp_diff = (
            self.last_hp - battle.opponent_active_pokemon.current_hp
        )
        self.last_hp = battle.opponent_active_pokemon.current_hp

        our_hp_diff = (
            self.last_hp - battle.active_pokemon.current_hp
        )
        self.last_hp = battle.active_pokemon.current_hp

        return score + hp_diff - our_hp_diff

    def update_q_table(self, battle):
        if self.last_state not in self.q_table:
            #self.q_table[self.last_state] = [0] * 4
            self.q_table[self.last_state] = [0] * 5  # Add space for Tera option

        if self.current_state not in self.q_table:
            # Initialize the current state as well
            self.q_table[self.current_state] = [0] * 5  # Again, adding space for Tera option

        q_old = self.q_table[self.last_state][self.last_action]
        temporal_difference = self.calc_reward(battle) + (self.gamma * max(self.q_table[self.current_state])) - q_old
        q_update = q_old + (self.alpha * temporal_difference)
        # Now put that into our q-table
        self.q_table[self.last_state][self.last_action] = q_update

In [3]:
team_1 = """
Goodra (M) @ Assault Vest
Ability: Sap Sipper
Tera Type: Steel
EVs: 248 HP / 252 SpA / 8 Spe
Modest Nature
IVs: 0 Atk
- Dragon Pulse
- Flamethrower
- Sludge Wave
- Thunderbolt
"""

In [4]:
class RandomTeamFromPool(Teambuilder):
    def __init__(self, teams):
        self.packed_teams = []

        for team in teams:
            parsed_team = self.parse_showdown_team(team)
            packed_team = self.join_team(parsed_team)
            self.packed_teams.append(packed_team)

    def yield_team(self):
        return np.random.choice(self.packed_teams)

In [5]:
team_4 = """
Goodra (M) @ Assault Vest
Ability: Sap Sipper
EVs: 248 HP / 252 SpA / 8 Spe
Modest Nature
IVs: 0 Atk
- Dragon Pulse
- Flamethrower
- Sludge Wave
- Thunderbolt
"""
team_2 = """
Sylveon (M) @ Leftovers
Ability: Pixilate
EVs: 248 HP / 244 Def / 16 SpD
Calm Nature
IVs: 0 Atk
- Hyper Voice
- Quick Attack
- Protect
- Wish
"""
team_3 = """
Blastoise @ Assault Vest
Ability: Torrent 
EVs: 252 HP / 252 SpA / 4 SpD  
Modest Nature  
IVs: 0 Atk  
- Ice Beam  
- Hydro Pump  
- Aura Sphere  
- Surf
"""
team_5 = """
Charizard @ Life Orb
Ability: Blaze
EVs: 252 SpA / 4 SpD / 252 Spe
Timid Nature
IVs: 0 Atk
- Air Slash  
- Flamethrower  
- Sunny Day  
- Dragon Dance
"""

teams = [team_4, team_2, team_3, team_5]
custom_builder = RandomTeamFromPool(teams)

In [6]:
train = QAgent(team=team_1, battle_format="gen9ou")
p2 = RandomPlayer(battle_format="gen9ou", team=custom_builder)



In [7]:
# for i in range(1000):
#     await train.battle_against(p2, n_battles=1)
for i in range(1000):
    await train.battle_against(p2, n_battles=1)

In [8]:
test = QAgent(team=team_1, battle_format="gen9ou")
test.new_q_table(train.q_table.copy())
test.change_epsilon(1)

In [9]:
# for i in range(800):
#     await test.battle_against(p2, n_battles=1)

for i in range(1000):
    await test.battle_against(p2, n_battles=1)

In [10]:
test.n_won_battles, test.n_finished_battles

(878, 1000)

In [11]:
test.q_table

{('charizard', False): [3069.5830762081587,
  3095.8400435880385,
  3330.128925070018,
  3120.118242908041,
  8584.198849788061],
 ('charizard', True): [1458.7643172108499,
  2524.6037764930365,
  8854.248327062694,
  2540.42543188326,
  0],
 ('blastoise', False): [7957.381662327152,
  6672.437556650628,
  6630.707896236835,
  6598.353561577998,
  1548.053509222949],
 ('sylveon', False): [5048.939772786791,
  5030.14486666149,
  7337.780194417824,
  4902.5273931745005,
  4637.17945551317],
 ('goodra', False): [7562.199338785146,
  5552.917708635204,
  5369.05047031562,
  5273.866548668153,
  3683.4204806411517],
 ('goodra', True): [1642.5665220100552,
  2883.169199708864,
  3705.9876574243885,
  3021.8795378244554,
  0],
 ('blastoise', True): [1546.5244095069452,
  1091.3107654966773,
  930.06334944614,
  860.5514408371057,
  0],
 ('sylveon', True): [4487.96008252179,
  4725.873191532706,
  4990.270171271243,
  4493.39194158112,
  0]}