from copy import copy
from gymnasium.spaces import Discrete,MultiDiscrete
from pettingzoo import ParallelEnv

In [None]:
class CustomEnvironment(ParallelEnv): # creates custom environment
    metadata = {
        "name": "custom_environment_v0",
    }
    def __init__(self, maxsteps):
        # two agents, defining initial starting moves, points, action spaces and observation spaces
        self.move1 = None
        self.point1 = None
        self.move2 = None
        self.point2 = None
        self.timestep = None # starting timestep as 0
        self.maxsteps = maxsteps
        self.possible_agents = ["player1", "player2"]
        self.action_spaces = {a: Discrete(len(MOVES)) for a in self.possible_agents}
        self.observation_spaces = {a: MultiDiscrete([len(MOVES), 20, 20]) for a in self.possible_agents}
    def apply_move(self,action, player_points):
        
    def reset(self, seed=None, options=None):
        self.agents=copy(self.possible_agents)
        self.timestep=0
        # when the game starts point is 0
        self.move1=1
        self.point1=0
        self.move2=1
        self.point2=0
        #define observations
        observations={
            a:(self.move1,self.point1,self.point2)
            for a in self.agents
        }
        infos={a:{} for a in self.agents}
        return observations,infos
    def step(self, actions):
        #applying the move
        self.move1, self.point1 = self.apply_move(actions["player1"], self.point1)
        self.move2, self.point2 = self.apply_move(actions["player2"], self.point2)
        rewards = {a: 0 for a in self.agents}
        #sets stopping value to 0
        terminations = {a: False for a in self.agents}
        truncations = {a: False for a in self.agents}
        winner = WIN_RULES.get((self.move1, self.move2)) # if both don't win, continue
        if winner == "player1":
            rewards = {"player1": 1, "player2": -1}
            terminations = {a: True for a in self.agents}
            print(f"P1Win")
            self.point1 = 0
            self.point2 = 0
        elif winner == "player2":
            rewards = {"player1": -1, "player2": 1}
            terminations = {a: True for a in self.agents}
            print(f"P2Win")
            self.point1 = 0
            self.point2 = 0
        else:
            truncations = {a: False for a in self.agents}
        self.timestep += 1
        if self.timestep > self.maxsteps:
            # stops the game if total steps goes over the maxstep count
            truncations = {"player1": True, "player2": True}
        observations = {a: (self.move1, self.point1, self.point2) for a in self.agents}
        # info for debugging, default in most envs
        infos = {a: {"p1_move": self.move1, "p2_move": self.move2,
            "p1_points": self.point1, "p2_points": self.point2,
            "winner": winner if any(terminations.values()) else None
        } for a in self.agents
        }
        return observations, rewards, terminations, truncations, infos
    def render(self):
        print(f"P1({self.point1} pts): {MOVES[self.move1]['name']}"
              f"P2({self.point2} pts): {MOVES[self.move2]['name']}")
    def observation_space(self, agent):
        return self.observation_spaces[agent]
    def action_space(self, agent):
        return self.action_spaces[agent]