In [3]:
from copy import copy
from gymnasium.spaces import Discrete,MultiDiscrete
from pettingzoo import ParallelEnv
import numpy as np

In [4]:
gridheight=15
gridwidth=20
#hyperparameters of the environment

In [5]:
class CustomEnvironment(ParallelEnv): # creates custom environment
    metadata = {
        "name": "custom_environment_v0",
    }
    def __init__(self, maxsteps):
        # two agents, defining initial starting moves, points, action spaces and observation spaces
        self.column_o = None
        self.column_x = None
        self.timestep = None # starting timestep as 0
        self.maxsteps = maxsteps
        self.possible_agents = ["player_o", "player_x"]
        self.action_spaces = {
            a: MultiDiscrete(gridheight*gridwidth,3) for a in self.possible_agents
            }
        self.observation_spaces = {
            self.grid
            }
        self.grid = [[0 for _ in range(7)] for _ in range(6)]
    def drop(self,x,y,symbol):#symbol is str
        if self.grid[x][y]==0:
            self.grid[x][y]=symbol
    def verticalcheck(self,x,y,symbol):
        for i in range(1,5):
            if self.grid[x][y-i]!=symbol:
                return 0
            else:
                return 1
    def horizontalcheck(self,x,y,symbol):
        link=1
        for i in range(1,5):
            if ((x+i>=0) and (x+i<=gridwidth)):
                if self.grid[x+i][y]==symbol:
                    link+=1
                else:
                    break
        for j in range(1,5):
            if ((x-j>=0) and (x-j<=gridwidth)):
                if self.grid[x-j][y]==symbol:
                    link+=1
                else:
                    break
        if link>=4:
            return 1
        else: 
            return 0
    def diagonalcheck(self,x,y,symbol):
        link=1
        for i in range(1,5):
            if (x+i>=0) and (x+i<=min(gridwidth,gridheight)):
                if self.grid[x+i][y+i]==symbol:
                    link+=1
                else:
                    break
        for j in range(1,5):
            if ((x-j>=0) and (x-j<=min(gridwidth,gridheight))):
                if self.grid[x-j][y-j]==symbol:
                    link+=1
                else:
                    break
        if link>=4:
            return 1
        else: 
            return 0
    def reset(self, seed=None, options=None):
        self.agents=copy(self.possible_agents)
        self.timestep=0
        # when the game starts point is 0
        self.column_o=1
        self.column_x=1
        self.grid = [[0 for _ in range(7)] for _ in range(6)]
        #define observations
        observations={
            self.grid
        }
        infos={a:{} for a in self.agents}
        return observations,infos

    def step(self, actions):
        #applying the move, assuming actions is a dict
        self.x_o=actions["player_o"]["x"]
        self.y_o = actions["player_o"]["y"]
        if ((self.grid[self.x_o][self.y_o]=="o")or(self.grid[self.x_o][self.y_o]=="x")):
            truncations = {a:True for a in self.agents}
            rewards = {"player_o": -1, "player_x": 0}#checks whether the column chosen in action space is full
        self.x_x=actions["player_x"]["x"]
        self.y_x = actions["player_x"]["y"]
        if ((self.grid[self.x_x][self.y_x]=="o")or(self.grid[self.x_x][self.y_x]=="x")):
            truncations = {a:True for a in self.agents}
            rewards = {"player_o": 0, "player_x": -1}
        rewards = {a: 0 for a in self.agents}
        #sets stopping value to 0
        terminations = {a: False for a in self.agents}
        truncations = {a: False for a in self.agents}
        if (self.verticalcheck(self.x_o,self.y_o,"o")) or (self.horizontalcheck(self.x_o,self.y_o,"o")) or (self.diagonalcheck(self.x_o,self.y_o,"o")):
            winner="player_o"
        elif ((self.verticalcheck(self.x_x,self.y_x,"x")) or (self.horizontalcheck(self.x_x,self.y_x,"x")) or (self.diagonalcheck(self.x_x,self.y_x,"x"))):
            winner="player_x"
        else:
            winner=" "
        if winner == "player_o":
            rewards = {"player_o": 1, "player_x": -1}
            terminations = {a: True for a in self.agents}
            print(f"P_oWin")
        elif winner == "player_x":
            rewards = {"player_o": -1, "player_x": 1}
            terminations = {a: True for a in self.agents}
            print(f"P_xWin")
        else:
            truncations = {a: False for a in self.agents}
        self.timestep += 1

        if self.timestep > self.maxsteps:
            # stops the game if total steps goes over the maxstep count
            truncations = {"player_o": True, "player_x": True}

        observations = {
            self.grid
            }
        # info for debugging, default in most envs
        infos = {a: {"p_o_move": self.column_o, "p_x_move": self.column_x,
            "winner": winner if any(terminations.values()) else None
        } for a in self.agents
        }
        return observations, rewards, terminations, truncations, infos
    
    def render(self):
        print(f"{self.y_o,self.column_o}"
              f"{self.y_x,self.column_x}")

    def observation_space(self, agent):
        return self.observation_spaces[agent]

    def action_space(self, agent):
        return self.action_spaces[agent]