In [None]:
import os, sys, pathlib

REPO_URL = "https://github.com/UmerSR/Connect-Four-RL.git"
WORKSPACE = "/content/Connect-Four-RL"

if not os.path.exists(WORKSPACE):
    !git clone $REPO_URL $WORKSPACE
else:
    %cd $WORKSPACE
    !git pull --ff-only

%cd $WORKSPACE/rl_connect4
sys.path.insert(0, WORKSPACE)
print("CWD:", os.getcwd())


In [None]:
!pip install -q stable-baselines3[extra] sb3-contrib gymnasium pygame numpy torch pandas matplotlib seaborn

In [None]:
import os
import random
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from typing import Dict, Tuple

from envs.connect_four_env import ConnectFourEnv
from agents.opponents import get_opponent

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on:", DEVICE)

# detect if running inside the cloned repo (Colab will place us in /content/Connect-Four-RL/rl_connect4)
HERE = Path.cwd()
ROOT = HERE if (HERE / 'artifacts').exists() else HERE.parent
ARTIFACTS = ROOT / 'artifacts'
MODEL_PATHS = {
    'PPO': ARTIFACTS / 'ppo' / 'ppo.pth',
    'PPO Pool': ARTIFACTS / 'ppo' / 'ppo_pool.pth',
    'PPO Dense': ARTIFACTS / 'ppo' / 'ppo_dense.pth',
    'DQN': ARTIFACTS / 'dqn' / 'dqn_connect4.pth',
    'REINFORCE Manual': ARTIFACTS / 'reinforce_manual' / 'reinforce_connect4.pth',
    'REINFORCE TS': ARTIFACTS / 'reinforce_tianshou' / 'reinforce_connect4.pth',
}

KIND_MAP = {
    'PPO': 'ppo',
    'PPO Pool': 'ppo_pool',
    'PPO Dense': 'ppo_dense',
    'DQN': 'dqn',
    'REINFORCE Manual': 'reinforce_manual',
    'REINFORCE TS': 'reinforce_tianshou',
}

print("Artifact root:", ARTIFACTS.resolve())
missing = []
for name, path in MODEL_PATHS.items():
    if not path.exists():
        missing.append((name, path))
if missing:
    for name, path in missing:
        print(f"Warning: missing model for {name}: {path}")
else:
    print("All model files found.")


In [None]:
def load_agents(device=DEVICE):
    agents = {}
    for name, path in MODEL_PATHS.items():
        kind = KIND_MAP[name]
        agents[name] = get_opponent(kind, model_path=path, device=device)
    return agents

def resolve_winner(agent_a_is_player1, winner_side):
    # winner_side: 1 or 2 (player number)
    if winner_side == 1 and agent_a_is_player1:
        return 'A'
    if winner_side == 2 and (not agent_a_is_player1):
        return 'A'
    return 'B'

def play_single_game(agent_a, agent_b, agent_a_is_player1=True, seed=None):
    env = ConnectFourEnv()
    if seed is not None:
        env.reset(seed=seed)
    else:
        env.reset()
    done = False
    last_actor = None
    while not done:
        current_player = env.current_player + 1  # 1 or 2
        if agent_a_is_player1:
            actor_agent = agent_a if current_player == 1 else agent_b
        else:
            actor_agent = agent_b if current_player == 1 else agent_a
        action = actor_agent.select_action(env)
        _, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        last_actor = current_player
    # Decide outcome
    if info.get('illegal_move'):
        # mover made illegal move; other agent wins
        winner_side = 2 if last_actor == 1 else 1
        return resolve_winner(agent_a_is_player1, winner_side)
    if reward == 1:
        # mover won
        winner_side = last_actor
        return resolve_winner(agent_a_is_player1, winner_side)
    if info.get('draw') or reward == 0:
        return 'D'
    return 'D'

def play_series(agent_a, agent_b, n_games=50, seed=42):
    rng = random.Random(seed)
    a_wins = 0
    b_wins = 0
    draws = 0
    for i in range(n_games):
        agent_a_is_p1 = (i % 2 == 0)  # alternate starting side
        result = play_single_game(agent_a, agent_b, agent_a_is_p1, seed=rng.randint(0, 1_000_000))
        if result == 'A':
            a_wins += 1
        elif result == 'B':
            b_wins += 1
        else:
            draws += 1
    return {
        'a_wins': a_wins,
        'b_wins': b_wins,
        'draws': draws,
        'total': n_games,
        'a_win_rate': a_wins / n_games,
        'b_win_rate': b_wins / n_games,
        'draw_rate': draws / n_games,
    }


In [None]:
agents = load_agents()
agent_names = list(agents.keys())
results = []

n_games = 40  # per matchup (20 as P1, 20 as P2)
for i, name_a in enumerate(agent_names):
    for j, name_b in enumerate(agent_names):
        if i == j:
            continue
        stats = play_series(agents[name_a], agents[name_b], n_games=n_games)
        results.append({
            'agent_a': name_a,
            'agent_b': name_b,
            **stats,
        })
        print(f"{name_a} vs {name_b}: A win {stats['a_win_rate']*100:.1f}%, B win {stats['b_win_rate']*100:.1f}%, Draw {stats['draw_rate']*100:.1f}%")

results_df = pd.DataFrame(results)
results_df


In [None]:
# Build win-rate matrix (rows = agent A, cols = agent B)
win_matrix = pd.DataFrame(0.0, index=agent_names, columns=agent_names)
draw_matrix = pd.DataFrame(0.0, index=agent_names, columns=agent_names)
for _, row in results_df.iterrows():
    a = row['agent_a']; b = row['agent_b']
    win_matrix.loc[a, b] = row['a_win_rate']
    draw_matrix.loc[a, b] = row['draw_rate']

plt.figure(figsize=(10, 8))
sns.heatmap(win_matrix, annot=True, fmt='.2f', cmap='Blues', cbar_kws={'label': 'Win rate (row vs col)'});
plt.title('Head-to-Head Win Rates'); plt.xlabel('Opponent'); plt.ylabel('Agent'); plt.tight_layout(); plt.show()

plt.figure(figsize=(10, 8))
sns.heatmap(draw_matrix, annot=True, fmt='.2f', cmap='Greys', cbar_kws={'label': 'Draw rate'});
plt.title('Head-to-Head Draw Rates'); plt.xlabel('Opponent'); plt.ylabel('Agent'); plt.tight_layout(); plt.show()


In [None]:
# Aggregate leaderboard by average win-rate vs others
agg = results_df.groupby('agent_a')['a_wins'].sum().to_frame()
agg['games'] = results_df.groupby('agent_a')['total'].sum()
agg['win_rate_overall'] = agg['a_wins'] / agg['games']
agg = agg.sort_values('win_rate_overall', ascending=False)

plt.figure(figsize=(8, 5))
plt.barh(agg.index, agg['win_rate_overall']*100, color='#4c7bd9');
plt.xlabel('Win rate vs field (%)'); plt.title('Overall Performance vs Field'); plt.gca().invert_yaxis(); plt.tight_layout(); plt.show()

agg


In [None]:
# Per-opponent margin (win - loss)
margin_matrix = pd.DataFrame(0.0, index=agent_names, columns=agent_names)
for _, row in results_df.iterrows():
    a = row['agent_a']; b = row['agent_b']
    margin_matrix.loc[a, b] = row['a_win_rate'] - row['b_win_rate']
plt.figure(figsize=(10, 8))
sns.heatmap(margin_matrix, annot=True, fmt='.2f', center=0, cmap='PiYG', cbar_kws={'label': 'Win rate difference'});
plt.title('Head-to-Head Win Rate Margin (row - col)'); plt.xlabel('Opponent'); plt.ylabel('Agent'); plt.tight_layout(); plt.show()
