In [1]:
from pettingzoo.classic import connect_four_v3
import numpy as np
import matplotlib.pyplot as plt

import os
os.environ["SDL_VIDEODRIVER"] = "dummy"
from IPython.display import clear_output

# Using the PettingZoo environment

This notebook provides smalls chunks of code to get you started with the Connect4 project. You do not have to use this code in you final file, but you can if you wish to. 

In [6]:
env = connect_four_v3.env(render_mode="rgb_array")
env.reset()

# Agents

Here are some implementations of trivial agents that you should be able to beat ultimately. 

In [2]:
from agents.play_leftmost import PlayLeftmostLegal
from agents.random import RandomPlayer
from agents.malynx_deep import MalynxDeep, MalynxWithoutBlunder
from agents.q_learner import QLearningAgent
from agents.human import HumanPlayer

We import the pre-trained Q-table for Q-learning agent.

In [3]:
import pickle

q_learning_agent = QLearningAgent()
with open("training/agent_q_learner.pkl", 'rb') as f:
    q_learning_agent.q_table = pickle.load(f)

In [5]:
random_agent = RandomPlayer()
leftmost_agent = PlayLeftmostLegal()
malynx_deep_agent = MalynxDeep()
malynx_without_blunder_agent = MalynxWithoutBlunder()
human_agent = HumanPlayer(name= "Human")

# Let's play!


The following function runs a full game between the two agents. 

In [8]:
def play_game(env, agent0, agent1, display=False):
    done = False
    env.reset()
    obs, _, _, _, _ = env.last()
    while not done:
        for i, agent in enumerate([agent0, agent1]):
            action = agent.get_action(obs, epsilon=0)
            env.step(action)
            if display:
                clear_output(wait=True)
                plt.imshow(env.render())
                plt.show()
            obs, reward, terminated, _, _ = env.last()
            done = terminated
            if np.sum(obs["action_mask"]) == 0:
                if display: 
                    print('Draw')
                return 0.5
            if done:
                if display:
                    print(f"Player {i}: {agent.name} won")
                    print(obs['observation'][:, :, 0]- obs['observation'][:, :, 1])
                    print(obs['action_mask'])
                return i

In [29]:
for opponent in [random_agent, leftmost_agent, malynx_deep_agent, malynx_without_blunder_agent]:
    print(f"Playing against {opponent.name}")
    game = play_game(env, q_learning_agent, opponent, display=False)
    if game == 0:
        print("Q-Learning Agent won")
    elif game == 1:
        print("Opponent won")
    else:
        print("Draw")

Playing against Random Player
Q-Learning Agent won
Playing against Left Player
Q-Learning Agent won
Playing against Malynx Deep
Opponent won
Playing against Malynx Avoiding Blunder
Opponent won


In [None]:
play_game(env, HumanPlayer(), agent, display=True)