In [7]:
team_1 = """
Alakazam  
Ability: No Ability  
- Psychic
- Seismic Toss
- Recover 
- Thunder Wave  

Snorlax  
Ability: No Ability  
- Body Slam  
- Hyper Beam  
- Earthquake  
- Self-Destruct  

Tauros  
Ability: No Ability  
- Blizzard  
- Body Slam  
- Earthquake  
- Hyper Beam  

Chansey  
Ability: No Ability  
- Ice Beam  
- Thunderbolt  
- Thunder Wave  
- Soft-Boiled  

Starmie  
Ability: No Ability  
- Thunder Wave  
- Psychic  
- Blizzard  
- Recover  

Exeggutor  
Ability: No Ability  
- Double-Edge  
- Explosion  
- Psychic  
- Sleep Powder  
"""
team_2 = """
Gengar  
Ability: none  
- Hypnosis  
- Psychic  
- Thunderbolt  
- Explosion  

Starmie  
Ability: none  
- Surf  
- Thunderbolt  
- Thunder Wave  
- Recover  

Zapdos  
Ability: Pressure  
- Thunderbolt  
- Drill Peck  
- Thunder Wave  
- Agility  

Chansey  
Ability: None
- Ice Beam  
- Thunderbolt  
- Thunder Wave  
- Soft-Boiled  

Snorlax  
Ability: None
- Body Slam  
- Hyper Beam 
- Earthquake  
- Self-Destruct  

Tauros  
Ability: None
- Body Slam  
- Hyper Beam  
- Blizzard  
- Earthquake  
"""

In [8]:
from __future__ import annotations

import glob
import os
import time

import supersuit as ss
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from main import Pokebot_Gen1_Environment
import warnings

import cloudpickle
import gymnasium
from pettingzoo.utils.env import ParallelEnv

# from ss.concat_vec_env import ConcatVecEnv
# from .multiproc_vec import ProcConcatVec


In [9]:
from supersuit.vector.concat_vec_env import ConcatVecEnv 
from supersuit.vector.multiproc_vec import ProcConcatVec
from supersuit.vector.vector_constructors import vec_env_args
from supersuit.vector.constructors import MakeCPUAsyncConstructor

In [10]:
def pokebot_env_fn(**kwargs):
    return Pokebot_Gen1_Environment(**kwargs)

In [11]:
def patched_vec_env_args(env_fn, num_envs):
    return ([env_fn] * num_envs,)
def patched_concat_vec_envs_v1(vec_env, num_vec_envs, num_cpus=0, base_class="gymnasium",observation = None,action = None):
    num_cpus = min(num_cpus, num_vec_envs)

    vec_env = MakeCPUAsyncConstructor(num_cpus)(*patched_vec_env_args(vec_env, num_vec_envs),obs_space = observation,act_space=action)

    if base_class == "gymnasium":
        return vec_env
    elif base_class == "stable_baselines":
        from supersuit.vector.sb_vector_wrapper import SBVecEnvWrapper

        return SBVecEnvWrapper(vec_env)
    elif base_class == "stable_baselines3":
        from supersuit.vector.sb3_vector_wrapper import SB3VecEnvWrapper

        return SB3VecEnvWrapper(vec_env)
    else:
        raise ValueError(
            "supersuit_vec_env only supports 'gymnasium', 'stable_baselines', and 'stable_baselines3' for its base_class"
        )

In [13]:
from stable_baselines3.common.vec_env import SubprocVecEnv, VecMonitor

def train_pokebot_selfplay(env_fn, steps: int = 10_000, seed: int = 0, **env_kwargs):
    # Directly use the env instance (already Parallel)
    env_for_spaces = pokebot_env_fn()
    first_agent = env_for_spaces.possible_agents[0]
    ob_space = env_for_spaces.observation_space(first_agent)
    ac_space = env_for_spaces.action_space(first_agent)

    # 2. Define a factory function that returns a wrapped vector env
    def make_env():
        raw_env = pokebot_env_fn()
        return ss.pettingzoo_env_to_vec_env_v1(raw_env)

    # 3. Patch vec_env_args if needed (should accept a factory function)
    # def vec_env_args(env_fn, num_envs):
    #     return ([env_fn] * num_envs,)

    # 4. Use the patched_concat_vec_envs_v1 to create the final vectorized env
    vec_env = patched_concat_vec_envs_v1(
        make_env,                # Pass the factory (do NOT call make_env here!)
        num_vec_envs=8,
        num_cpus=2,
        base_class="stable_baselines3",
        observation=ob_space,
        action=ac_space
    )

    vec_env.close()
    # env.reset(seed=seed)
    # print(f"Starting training on {str(env.metadata['name'])}.")

    # # Wrap with Supersuit for vectorization


    # model = PPO(
    #     MlpPolicy,
    #     env,
    #     verbose=3,
    #     learning_rate=1e-3,
    #     batch_size=256,
    # )

    # model.learn(total_timesteps=steps)

    # model.save(f"{env.unwrapped.metadata.get('name')}_{time.strftime('%Y%m%d-%H%M%S')}")
    # print("Model has been saved.")

    # print(f"Finished training on {str(env.unwrapped.metadata['name'])}.")
    # env.close()
train_pokebot_selfplay(pokebot_env_fn)

In [6]:
from pettingzoo.utils import parallel_to_aec

def eval_pokebot(env_fn, num_games: int = 100, render_mode: str = None, **env_kwargs):
    parallel_env = env_fn(**env_kwargs)
    env = parallel_to_aec(parallel_env)

    print(f"\nStarting evaluation on {str(env.metadata['name'])}.")

    try:
        latest_policy = max(
            glob.glob(f"{env.metadata['name']}*.zip"), key=os.path.getctime
        )
    except ValueError:
        print("Policy not found.")
        exit(0)

    model = PPO.load(latest_policy)

    rewards = {agent: 0 for agent in env.possible_agents}

    for i in range(num_games):
        env.reset(seed=i)
        for agent in env.agent_iter():
            obs, reward, termination, truncation, info = env.last()
            for a in env.agents:
                rewards[a] += env.rewards[a]
            if termination or truncation:
                break
            else:
                act = model.predict(obs, deterministic=True)[0]
                env.step(act)

    env.close()

    avg_reward = sum(rewards.values()) / len(rewards.values())
    print("Rewards: ", rewards)
    print(f"Avg reward: {avg_reward}")
    return avg_reward

In [12]:
env = Pokebot_Gen1_Environment(battle_format="gen1ou", 
    team1= team_1,
    team2= team_2,
    start_challenging=True)
print(env.observation_spaces)
print(env.action_spaces)

{'rl_agent_1_b6fee 1': Box(0, [[ 255    1    1   15   15   15   31 1023 1023 1023 1023 1023 1023 1023
  1023  127   15   15   15   15   15    3  255  127  255  127  255  127
   255  127 1023  511]
 [ 255    1    1   15   15   15   31 1023 1023 1023 1023 1023 1023 1023
  1023  127   15   15   15   15   15    3  255  127  255  127  255  127
   255  127 1023  511]
 [ 255    1    1   15   15   15   31 1023 1023 1023 1023 1023 1023 1023
  1023  127   15   15   15   15   15    3  255  127  255  127  255  127
   255  127 1023  511]
 [ 255    1    1   15   15   15   31 1023 1023 1023 1023 1023 1023 1023
  1023  127   15   15   15   15   15    3  255  127  255  127  255  127
   255  127 1023  511]
 [ 255    1    1   15   15   15   31 1023 1023 1023 1023 1023 1023 1023
  1023  127   15   15   15   15   15    3  255  127  255  127  255  127
   255  127 1023  511]
 [ 255    1    1   15   15   15   31 1023 1023 1023 1023 1023 1023 1023
  1023  127   15   15   15   15   15    3  255  127  255  127  

In [13]:
env = Pokebot_Gen1_Environment(battle_format="gen1ou", 
    team1= team_1,
    team2= team_2,
    start_challenging=True)

# Wrap AFTER instantiating the env
vec_env = ss.pettingzoo_env_to_vec_env_v1(env)
vec_env = ss.concat_vec_envs_v1(vec_env, 8, num_cpus=2, base_class="stable_baselines3")

vec_env.reset(seed=0)

TypeError: MakeCPUAsyncConstructor.<locals>.constructor() missing 2 required positional arguments: 'obs_space' and 'act_space'

In [8]:

train_pokebot_selfplay(pokebot_env_fn, steps=50000,battle_format="gen1ou", 
    team1= team_1,
    team2= team_2,
    start_challenging=True)
# eval_pokebot(pokebot_env_fn, num_games=20)

TypeError: MakeCPUAsyncConstructor.<locals>.constructor() missing 2 required positional arguments: 'obs_space' and 'act_space'

In [None]:
def train_model_supersuit(
    env, steps = 10_000, seed = None,
):
    env.reset(seed=seed)
    print(f"Starting training on {str(env.metadata['name'])}.")
    env = ss.pettingzoo_env_to_vec_env_v1(env)
    env = ss.concat_vec_envs_v1(env, 8, num_cpus=8, base_class="stable_baselines3")
    env.close

In [8]:
team_1 = """
Alakazam  
Ability: No Ability  
- Psychic
- Seismic Toss
- Recover 
- Thunder Wave  

Snorlax  
Ability: No Ability  
- Body Slam  
- Hyper Beam  
- Earthquake  
- Self-Destruct  

Tauros  
Ability: No Ability  
- Blizzard  
- Body Slam  
- Earthquake  
- Hyper Beam  

Chansey  
Ability: No Ability  
- Ice Beam  
- Thunderbolt  
- Thunder Wave  
- Soft-Boiled  

Starmie  
Ability: No Ability  
- Thunder Wave  
- Psychic  
- Blizzard  
- Recover  

Exeggutor  
Ability: No Ability  
- Double-Edge  
- Explosion  
- Psychic  
- Sleep Powder  
"""
team_2 = """
Gengar  
Ability: none  
- Hypnosis  
- Psychic  
- Thunderbolt  
- Explosion  

Starmie  
Ability: none  
- Surf  
- Thunderbolt  
- Thunder Wave  
- Recover  

Zapdos  
Ability: Pressure  
- Thunderbolt  
- Drill Peck  
- Thunder Wave  
- Agility  

Chansey  
Ability: None
- Ice Beam  
- Thunderbolt  
- Thunder Wave  
- Soft-Boiled  

Snorlax  
Ability: None
- Body Slam  
- Hyper Beam 
- Earthquake  
- Self-Destruct  

Tauros  
Ability: None
- Body Slam  
- Hyper Beam  
- Blizzard  
- Earthquake  
"""

In [9]:
test_env = Pokebot_Gen1_Environment(battle_format="gen1ou", 
    team1= team_1,
    team2= team_2,
    start_challenging=True 
)

In [None]:
### Implement render mode

In [10]:
train_model_supersuit(test_env)

Starting training on pokemon_gen_1_v1.


TypeError: MakeCPUAsyncConstructor.<locals>.constructor() missing 2 required positional arguments: 'obs_space' and 'act_space'