In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from kaggle_environments import make, evaluate
from IPython.display import HTML, display, clear_output
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from typing import List, Dict, Tuple, Optional
import logging
import time
import random
from pathlib import Path

# 设置matplotlib中文字体
plt.rcParams['font.sans-serif'] = ['DejaVu Sans', 'SimHei', 'Arial Unicode MS'] 
plt.rcParams['axes.unicode_minus'] = False

print("导入完成！")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# 设置随机种子
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

In [None]:
class ConnectXNet(nn.Module):
    """ConnectX神经网络模型"""
    
    def __init__(self, input_size=42, hidden_size=512, output_size=7):
        super(ConnectXNet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # 特征提取层
        self.feature_layers = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        # 策略头 (选择动作的概率分布)
        self.policy_head = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size // 4),
            nn.ReLU(),
            nn.Linear(hidden_size // 4, output_size)
        )
        
        # 价值头 (状态价值评估)
        self.value_head = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size // 4),
            nn.ReLU(),
            nn.Linear(hidden_size // 4, 1),
            nn.Tanh()
        )
        
        # 初始化权重
        self._initialize_weights()
    
    def _initialize_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)
    
    def forward(self, x):
        features = self.feature_layers(x)
        policy = self.policy_head(features)
        value = self.value_head(features)
        return policy, value

# 测试模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ConnectXNet().to(device)
print(f"模型已创建，使用设备: {device}")
print(f"模型参数数量: {sum(p.numel() for p in model.parameters()):,}")

# 测试前向传播
test_input = torch.randn(1, 42).to(device)
with torch.no_grad():
    policy, value = model(test_input)
    print(f"策略输出形状: {policy.shape}, 价值输出形状: {value.shape}")
    print(f"策略输出: {policy}")
    print(f"价值输出: {value}")

del test_input, policy, value  # 清理内存

In [None]:
class BattleVisualizer:
    """对战可视化器"""
    
    def __init__(self):
        self.battle_history = []
        self.performance_data = {
            'episode': [],
            'agent1_wins': [],
            'agent2_wins': [],
            'draws': [],
            'avg_game_length': []
        }
        
    def render_board(self, board, title="ConnectX Board"):
        """渲染游戏板"""
        board_2d = np.array(board).reshape(6, 7)
        
        plt.figure(figsize=(8, 6))
        
        # 创建颜色映射
        colors = ['white', 'red', 'yellow']
        cmap = plt.matplotlib.colors.ListedColormap(colors)
        
        plt.imshow(board_2d, cmap=cmap, vmin=0, vmax=2)
        
        # 添加网格线
        for i in range(7):
            plt.axvline(x=i-0.5, color='black', linewidth=2)
        for i in range(6):
            plt.axhline(y=i-0.5, color='black', linewidth=2)
            
        # 在格子中显示棋子
        for i in range(6):
            for j in range(7):
                if board_2d[i, j] == 1:
                    plt.text(j, i, '●', fontsize=20, ha='center', va='center', color='darkred')
                elif board_2d[i, j] == 2:
                    plt.text(j, i, '●', fontsize=20, ha='center', va='center', color='orange')
        
        plt.title(title, fontsize=16, fontweight='bold')
        plt.xticks(range(7), [f'Col {i}' for i in range(7)])
        plt.yticks(range(6), [f'Row {i}' for i in range(6)])
        plt.tight_layout()
        plt.show()
    
    def show_game_replay(self, env_steps, agent1_name="Agent1", agent2_name="Agent2"):
        """显示游戏回放"""
        print(f"\\n{'='*60}")
        print(f"🎮 对战回放: {agent1_name} vs {agent2_name}")
        print(f"{'='*60}")
        
        for step_idx, step in enumerate(env_steps):
            if step_idx == 0:
                print(f"\\n初始状态:")
                self.render_board(step[0]['board'])
            else:
                player = step[0]['mark']
                action = step[1]
                board = step[0]['board']
                
                player_name = agent1_name if player == 1 else agent2_name
                color = "🔴" if player == 1 else "🟡"
                
                print(f"\\n回合 {step_idx}: {color} {player_name} 在第 {action} 列落子")
                self.render_board(board, f"回合 {step_idx} - {player_name} 的行动")
                
                # 检查游戏是否结束
                if len(env_steps) == step_idx + 1:
                    # 判断游戏结果
                    if self.check_winner(board) == 1:
                        print(f"\\n🏆 {agent1_name} 获胜！")
                    elif self.check_winner(board) == 2:
                        print(f"\\n🏆 {agent2_name} 获胜！")
                    else:
                        print(f"\\n🤝 平局！")
    
    def check_winner(self, board):
        """检查获胜者"""
        board_2d = np.array(board).reshape(6, 7)
        
        # 检查水平、垂直和对角线
        for player in [1, 2]:
            # 水平检查
            for row in range(6):
                for col in range(4):
                    if all(board_2d[row, col+i] == player for i in range(4)):
                        return player
            
            # 垂直检查
            for row in range(3):
                for col in range(7):
                    if all(board_2d[row+i, col] == player for i in range(4)):
                        return player
            
            # 对角线检查（左上到右下）
            for row in range(3):
                for col in range(4):
                    if all(board_2d[row+i, col+i] == player for i in range(4)):
                        return player
            
            # 对角线检查（右上到左下）
            for row in range(3):
                for col in range(3, 7):
                    if all(board_2d[row+i, col-i] == player for i in range(4)):
                        return player
        
        return 0  # 无获胜者
    
    def plot_performance_stats(self):
        """绘制性能统计图"""
        if not self.performance_data['episode']:
            print("暂无性能数据")
            return
            
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        episodes = self.performance_data['episode']
        
        # 胜率趋势
        axes[0, 0].plot(episodes, self.performance_data['agent1_wins'], 'r-o', label='Agent1 胜率', alpha=0.7)
        axes[0, 0].plot(episodes, self.performance_data['agent2_wins'], 'b-o', label='Agent2 胜率', alpha=0.7)
        axes[0, 0].plot(episodes, self.performance_data['draws'], 'g-o', label='平局率', alpha=0.7)
        axes[0, 0].set_title('胜率趋势')
        axes[0, 0].set_xlabel('Episode')
        axes[0, 0].set_ylabel('胜率')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # 平均游戏长度
        axes[0, 1].plot(episodes, self.performance_data['avg_game_length'], 'purple', marker='s')
        axes[0, 1].set_title('平均游戏长度')
        axes[0, 1].set_xlabel('Episode')
        axes[0, 1].set_ylabel('平均回合数')
        axes[0, 1].grid(True, alpha=0.3)
        
        # 累计胜负统计
        cumulative_wins1 = np.cumsum(self.performance_data['agent1_wins'])
        cumulative_wins2 = np.cumsum(self.performance_data['agent2_wins'])
        cumulative_draws = np.cumsum(self.performance_data['draws'])
        
        axes[1, 0].plot(episodes, cumulative_wins1, 'r-', label='Agent1 累计胜场')
        axes[1, 0].plot(episodes, cumulative_wins2, 'b-', label='Agent2 累计胜场')
        axes[1, 0].plot(episodes, cumulative_draws, 'g-', label='累计平局')
        axes[1, 0].set_title('累计胜负统计')
        axes[1, 0].set_xlabel('Episode')
        axes[1, 0].set_ylabel('累计场次')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
        
        # 最近的胜率分布
        if len(episodes) > 0:
            recent_data = [
                self.performance_data['agent1_wins'][-1] if self.performance_data['agent1_wins'] else 0,
                self.performance_data['agent2_wins'][-1] if self.performance_data['agent2_wins'] else 0,
                self.performance_data['draws'][-1] if self.performance_data['draws'] else 0
            ]
            labels = ['Agent1', 'Agent2', '平局']
            colors = ['red', 'blue', 'green']
            axes[1, 1].pie(recent_data, labels=labels, colors=colors, autopct='%1.1f%%')
            axes[1, 1].set_title('最新胜率分布')
        
        plt.tight_layout()
        plt.show()

# 创建可视化器实例
visualizer = BattleVisualizer()
print("对战可视化器已创建！")

In [None]:
class ConnectXAgent:
    """ConnectX智能体基类"""
    
    def __init__(self, name="Agent"):
        self.name = name
        self.wins = 0
        self.losses = 0
        self.draws = 0
    
    def get_action(self, observation, configuration):
        """获取下一步行动"""
        raise NotImplementedError
    
    def reset_stats(self):
        """重置统计数据"""
        self.wins = self.losses = self.draws = 0

class NeuralNetworkAgent(ConnectXAgent):
    """基于神经网络的智能体"""
    
    def __init__(self, model, name="NeuralAgent", use_exploration=True, epsilon=0.1):
        super().__init__(name)
        self.model = model
        self.device = next(model.parameters()).device
        self.use_exploration = use_exploration
        self.epsilon = epsilon
    
    def board_to_input(self, board, mark):
        """将游戏板转换为神经网络输入"""
        # 将对手标记为-1，自己标记为1，空位为0
        processed_board = []
        for cell in board:
            if cell == 0:
                processed_board.append(0.0)
            elif cell == mark:
                processed_board.append(1.0)
            else:
                processed_board.append(-1.0)
        
        return torch.FloatTensor(processed_board).unsqueeze(0).to(self.device)
    
    def get_valid_actions(self, board):
        """获取有效动作列表"""
        return [col for col in range(7) if board[col] == 0]
    
    def get_action(self, observation, configuration):
        """获取下一步行动"""
        board = observation.board
        mark = observation.mark
        
        valid_actions = self.get_valid_actions(board)
        if not valid_actions:
            return random.choice(range(7))  # 应该不会发生
        
        # 使用神经网络预测
        with torch.no_grad():
            board_input = self.board_to_input(board, mark)
            policy, value = self.model(board_input)
            
            # 应用softmax获得动作概率
            action_probs = F.softmax(policy, dim=-1).cpu().numpy()[0]
            
            # 只考虑有效动作
            valid_probs = [(action, action_probs[action]) for action in valid_actions]
            valid_probs.sort(key=lambda x: x[1], reverse=True)
            
            # 探索 vs 利用
            if self.use_exploration and random.random() < self.epsilon:
                # 探索：随机选择有效动作
                action = random.choice(valid_actions)
            else:
                # 利用：选择概率最高的动作
                action = valid_probs[0][0]
        
        return action

class DataBasedAgent(ConnectXAgent):
    """基于数据集的智能体"""
    
    def __init__(self, data_file="connectx-state-action-value.txt", name="DataAgent"):
        super().__init__(name)
        self.state_values = {}
        self.load_data(data_file)
    
    def load_data(self, data_file):
        """加载状态-动作-价值数据"""
        try:
            print(f"正在加载数据文件: {data_file}")
            with open(data_file, 'r') as f:
                for line_num, line in enumerate(f, 1):
                    if line_num > 100000:  # 限制数据量
                        break
                        
                    parts = line.strip().split(',')
                    if len(parts) == 8:  # 42字符状态 + 7个动作价值
                        state = parts[0]
                        values = [float(v) for v in parts[1:]]
                        self.state_values[state] = values
                        
                        if line_num % 10000 == 0:
                            print(f"已加载 {line_num} 行数据")
            
            print(f"数据加载完成！总共加载了 {len(self.state_values)} 个状态")
        
        except FileNotFoundError:
            print(f"警告：数据文件 {data_file} 未找到，将使用随机策略")
            self.state_values = {}
        except Exception as e:
            print(f"加载数据时出错: {e}")
            self.state_values = {}
    
    def board_to_state_string(self, board, mark):
        """将游戏板转换为状态字符串"""
        # 转换为数据集格式：自己是1，对手是2，空位是0
        state_chars = []
        for cell in board:
            if cell == 0:
                state_chars.append('0')
            elif cell == mark:
                state_chars.append('1')
            else:
                state_chars.append('2')
        return ''.join(state_chars)
    
    def get_valid_actions(self, board):
        """获取有效动作列表"""
        return [col for col in range(7) if board[col] == 0]
    
    def get_action(self, observation, configuration):
        """根据数据集获取最佳动作"""
        board = observation.board
        mark = observation.mark
        
        valid_actions = self.get_valid_actions(board)
        if not valid_actions:
            return random.choice(range(7))
        
        # 生成状态字符串
        state_str = self.board_to_state_string(board, mark)
        
        if state_str in self.state_values:
            # 使用数据集中的价值
            action_values = self.state_values[state_str]
            # 找到价值最高的有效动作
            best_action = None
            best_value = float('-inf')
            
            for action in valid_actions:
                if action_values[action] > best_value:
                    best_value = action_values[action]
                    best_action = action
            
            return best_action if best_action is not None else random.choice(valid_actions)
        else:
            # 状态未在数据集中，使用启发式规则
            return self.heuristic_action(board, mark, valid_actions)
    
    def heuristic_action(self, board, mark, valid_actions):
        """启发式动作选择"""
        # 简单的启发式：优先选择中间列
        center_preference = [3, 2, 4, 1, 5, 0, 6]
        for col in center_preference:
            if col in valid_actions:
                return col
        return random.choice(valid_actions)

# 创建不同类型的智能体用于测试
print("智能体类定义完成！")

# 创建基于数据集的智能体
data_agent = DataBasedAgent(name="DataAgent")
print(f"数据代理创建完成，加载了 {len(data_agent.state_values)} 个状态")

In [None]:
def run_battle_with_visualization(agent1, agent2, visualizer, show_replay=True):
    """运行单场对战并可视化"""
    
    # 创建环境
    env = make("connectx", debug=False)
    
    # 设置智能体函数
    def agent1_func(observation, configuration):
        return agent1.get_action(observation, configuration)
    
    def agent2_func(observation, configuration):
        return agent2.get_action(observation, configuration)
    
    # 运行游戏
    env_steps = env.run([agent1_func, agent2_func])
    
    if show_replay:
        visualizer.show_game_replay(env_steps, agent1.name, agent2.name)
    
    # 分析结果
    if len(env_steps) > 0:
        final_step = env_steps[-1]
        final_board = final_step[0]['board']
        winner = visualizer.check_winner(final_board)
        game_length = len(env_steps) - 1  # 减去初始状态
        
        return winner, game_length
    
    return 0, 0

def run_training_with_battles(agent1, agent2, visualizer, total_episodes=500, 
                             battle_interval=50, games_per_battle=10):
    """运行训练并每隔指定episode显示对战"""
    
    print(f"\\n{'='*80}")
    print(f"🚀 开始训练！总共 {total_episodes} episodes")
    print(f"📊 每 {battle_interval} episodes 进行一次对战展示 (每次 {games_per_battle} 场游戏)")
    print(f"{'='*80}")
    
    stats = {
        'agent1_total_wins': 0,
        'agent2_total_wins': 0,
        'total_draws': 0,
        'total_games': 0
    }
    
    for episode in range(0, total_episodes, battle_interval):
        print(f"\\n{'='*60}")
        print(f"📍 Episode {episode}-{min(episode + battle_interval - 1, total_episodes - 1)}")
        print(f"{'='*60}")
        
        # 运行多场游戏进行统计
        episode_stats = {
            'agent1_wins': 0,
            'agent2_wins': 0,
            'draws': 0,
            'game_lengths': []
        }
        
        # 快速批量测试（不显示详细过程）
        print(f"\\n🎯 正在进行 {games_per_battle} 场快速对战...")
        for game in range(games_per_battle):
            winner, game_length = run_battle_with_visualization(
                agent1, agent2, visualizer, show_replay=False
            )
            
            episode_stats['game_lengths'].append(game_length)
            
            if winner == 1:
                episode_stats['agent1_wins'] += 1
                agent1.wins += 1
                agent2.losses += 1
            elif winner == 2:
                episode_stats['agent2_wins'] += 1
                agent2.wins += 1
                agent1.losses += 1
            else:
                episode_stats['draws'] += 1
                agent1.draws += 1
                agent2.draws += 1
        
        # 计算统计数据
        agent1_winrate = episode_stats['agent1_wins'] / games_per_battle * 100
        agent2_winrate = episode_stats['agent2_wins'] / games_per_battle * 100
        draw_rate = episode_stats['draws'] / games_per_battle * 100
        avg_game_length = np.mean(episode_stats['game_lengths'])
        
        # 更新总统计
        stats['agent1_total_wins'] += episode_stats['agent1_wins']
        stats['agent2_total_wins'] += episode_stats['agent2_wins']
        stats['total_draws'] += episode_stats['draws']
        stats['total_games'] += games_per_battle
        
        # 显示统计结果
        print(f"\\n📈 本轮统计结果:")
        print(f"  🔴 {agent1.name}: {episode_stats['agent1_wins']}/{games_per_battle} 胜 ({agent1_winrate:.1f}%)")
        print(f"  🔵 {agent2.name}: {episode_stats['agent2_wins']}/{games_per_battle} 胜 ({agent2_winrate:.1f}%)")
        print(f"  🤝 平局: {episode_stats['draws']}/{games_per_battle} 场 ({draw_rate:.1f}%)")
        print(f"  ⏱️  平均游戏长度: {avg_game_length:.1f} 回合")
        
        # 保存性能数据用于绘图
        visualizer.performance_data['episode'].append(episode + battle_interval)
        visualizer.performance_data['agent1_wins'].append(agent1_winrate / 100)
        visualizer.performance_data['agent2_wins'].append(agent2_winrate / 100) 
        visualizer.performance_data['draws'].append(draw_rate / 100)
        visualizer.performance_data['avg_game_length'].append(avg_game_length)
        
        # 显示一场详细的对战过程
        print(f"\\n🎮 展示一场详细对战过程:")
        run_battle_with_visualization(agent1, agent2, visualizer, show_replay=True)
        
        # 显示累计统计
        total_winrate1 = stats['agent1_total_wins'] / stats['total_games'] * 100\n        total_winrate2 = stats['agent2_total_wins'] / stats['total_games'] * 100\n        total_drawrate = stats['total_draws'] / stats['total_games'] * 100\n        \n        print(f\"\\n📊 累计统计 (总共 {stats['total_games']} 场游戏):\")\n        print(f\"  🔴 {agent1.name}: {stats['agent1_total_wins']} 胜 ({total_winrate1:.1f}%)\")\n        print(f\"  🔵 {agent2.name}: {stats['agent2_total_wins']} 胜 ({total_winrate2:.1f}%)\")\n        print(f\"  🤝 平局: {stats['total_draws']} 场 ({total_drawrate:.1f}%)\")\n        \n        # 绘制性能图表\n        print(f\"\\n📈 绘制性能趋势图...\")\n        visualizer.plot_performance_stats()\n        \n        # 等待用户确认继续\n        if episode + battle_interval < total_episodes:\n            input(f\"\\n⏸️  按 Enter 键继续下一轮训练...\")\n            clear_output(wait=True)  # 清理输出，保持界面整洁\n    \n    print(f\"\\n{'='*80}\")\n    print(f\"🏁 训练完成！\")\n    print(f\"📊 最终统计 (总共 {stats['total_games']} 场游戏):\")\n    print(f\"  🔴 {agent1.name}: {stats['agent1_total_wins']} 胜 ({stats['agent1_total_wins']/stats['total_games']*100:.1f}%)\")\n    print(f\"  🔵 {agent2.name}: {stats['agent2_total_wins']} 胜 ({stats['agent2_total_wins']/stats['total_games']*100:.1f}%)\")\n    print(f\"  🤝 平局: {stats['total_draws']} 场 ({stats['total_draws']/stats['total_games']*100:.1f}%)\")\n    print(f\"{'='*80}\")\n    \n    return stats\n\nprint(\"训练和对战功能已准备完成！\")

In [None]:
# 🎮 运行训练和对战展示！\n# 这个cell会每50个episode展示一次对战过程\n\n# 创建神经网络智能体\nneural_agent = NeuralNetworkAgent(model, name=\"NeuralAgent\", epsilon=0.2)\n\n# 选择对手 (可以是数据代理、随机代理等)\nopponent_options = {\n    \"data\": data_agent,\n    \"random\": ConnectXAgent(\"RandomAgent\")  # 这里可以实现随机代理\n}\n\nprint(\"可选择的对手:\")\nfor key, agent in opponent_options.items():\n    print(f\"  {key}: {agent.name}\")\n\n# 设置对手\nopponent = data_agent  # 默认使用数据代理作为对手\n\nprint(f\"\\n🤖 选择的智能体组合:\")\nprint(f\"  玩家1: {neural_agent.name} (神经网络)\")\nprint(f\"  玩家2: {opponent.name} (基于数据集)\")\n\n# 开始训练！\nprint(f\"\\n准备开始训练... 你可以修改以下参数:\")\nprint(f\"  - total_episodes: 总的训练轮数\")\nprint(f\"  - battle_interval: 每隔多少episode展示一次对战\")\nprint(f\"  - games_per_battle: 每次展示时运行多少场游戏进行统计\")\nprint(f\"\\n运行下面的代码开始训练:\")\nprint(f\"final_stats = run_training_with_battles(\")\nprint(f\"    agent1=neural_agent,\")\nprint(f\"    agent2=opponent,\")\nprint(f\"    visualizer=visualizer,\")\nprint(f\"    total_episodes=200,    # 总共200轮\")\nprint(f\"    battle_interval=50,    # 每50轮展示一次\")\nprint(f\"    games_per_battle=10    # 每次展示10场游戏\")\nprint(f\")\") 

In [None]:
# 🚀 执行训练！\n# 运行这个cell开始实际的训练过程\n\ntry:\n    final_stats = run_training_with_battles(\n        agent1=neural_agent,\n        agent2=opponent, \n        visualizer=visualizer,\n        total_episodes=200,     # 总共200个episode\n        battle_interval=50,     # 每50个episode展示一次对战\n        games_per_battle=10     # 每次展示运行10场游戏统计\n    )\n    \n    print(\"\\n🎉 训练完成！\")\n    print(f\"最终统计结果: {final_stats}\")\n    \nexcept KeyboardInterrupt:\n    print(\"\\n⏹️ 训练被用户中断\")\nexcept Exception as e:\n    print(f\"\\n❌ 训练过程中出现错误: {e}\")\n    import traceback\n    traceback.print_exc()

In [None]:
# 🔍 额外测试和分析\n\n# 1. 单独测试一场游戏\nprint(\"=\" * 50)\nprint(\"🎯 单场对战测试\")\nprint(\"=\" * 50)\n\n# 运行单场游戏并显示详细过程\nwinner, game_length = run_battle_with_visualization(neural_agent, data_agent, visualizer)\n\nif winner == 1:\n    print(f\"\\n🏆 {neural_agent.name} 获胜！游戏长度: {game_length} 回合\")\nelif winner == 2:\n    print(f\"\\n🏆 {data_agent.name} 获胜！游戏长度: {game_length} 回合\")\nelse:\n    print(f\"\\n🤝 平局！游戏长度: {game_length} 回合\")\n\nprint(\"\\n\" + \"=\" * 50)

In [None]:
# 📊 性能分析和最终总结\n\n# 显示最终的性能图表\nif visualizer.performance_data['episode']:\n    print(\"📈 显示完整的性能趋势图...\")\n    visualizer.plot_performance_stats()\nelse:\n    print(\"⚠️ 还没有性能数据，请先运行训练\")\n\n# 智能体统计信息\nprint(\"\\n🤖 智能体统计信息:\")\nprint(f\"  {neural_agent.name}:\")\nprint(f\"    胜: {neural_agent.wins}, 负: {neural_agent.losses}, 平: {neural_agent.draws}\")\nprint(f\"  {data_agent.name}:\")\nprint(f\"    胜: {data_agent.wins}, 负: {data_agent.losses}, 平: {data_agent.draws}\")\n\n# 使用说明\nprint(\"\\n\" + \"=\"*80)\nprint(\"📖 使用说明\")\nprint(\"=\"*80)\nprint(\"\"\"\n🎯 这个notebook的主要功能:\n1. 创建基于神经网络的ConnectX智能体\n2. 创建基于数据集的ConnectX智能体  \n3. 每50个episode展示一次详细的对战过程\n4. 实时显示性能统计图表\n5. 可视化游戏板和对战过程\n\n🔧 可以调整的参数:\n- total_episodes: 总的训练轮数\n- battle_interval: 每隔多少episode展示对战\n- games_per_battle: 每次统计的游戏场数\n- epsilon: 神经网络智能体的探索率\n\n🎮 支持的智能体类型:\n- NeuralNetworkAgent: 基于PyTorch神经网络\n- DataBasedAgent: 基于connectx-state-action-value.txt数据集\n- 可以轻松扩展其他类型的智能体\n\n📊 可视化功能:\n- 实时显示游戏板状态\n- 对战过程回放\n- 胜率趋势图\n- 游戏长度统计\n- 累计性能分析\n\n💡 提示:\n- 每次对战展示后会暂停，按Enter继续\n- 可以随时中断训练（Ctrl+C）\n- 所有统计数据都会保存在visualizer中\n\"\"\")\nprint(\"=\"*80)\n\nprint(\"\\n🎉 ConnectX训练和对战可视化系统已准备就绪！\")\nprint(\"现在可以运行训练cell来开始训练，或者单独运行测试cell来观看对战。\")