In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GMKnn(nn.Module):
    def __init__(self):
        super(GMKnn, self).__init__()
        self.myin = nn.Linear(15*15, 128)
        self.oppin = nn.Linear(15*15, 128)
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 32)
        self.q = nn.Linear(32, 1)
    
    def forward(self,board1,board2):
        # board1 board2 全都是正常向量
        # batch*225
        x1 = F.relu(self.myin(board1))
        x2 = F.relu(self.oppin(board2))
        x = torch.cat([x1, x2], dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        q = self.q(x)
        return q

## 让mlp学t+0优势函数

In [2]:
import pickle
import os
import numpy as np

# 全局哈希表: (board_bitmap_tuple, my_player) -> advantage_f
# board_bitmap_tuple = (black_bitmap_int, white_bitmap_int)
board_hash_table = {}

def board_to_bitmap(board):
    """
    将15x15棋盘转换为两个bitmap整数
    board[i][j] = 1 表示黑棋, = 2 表示白棋, = 0 表示空
    返回: (black_bitmap, white_bitmap) 两个int
    """
    # 方法1: 使用 numpy 布尔索引 + packbits (最快)
    black_bits = (board == 1).flatten()
    white_bits = (board == 2).flatten()
    
    # 转换为整数 (从bit array到int)
    black_bitmap = int(''.join(black_bits.astype(int).astype(str)), 2) if black_bits.any() else 0
    white_bitmap = int(''.join(white_bits.astype(int).astype(str)), 2) if white_bits.any() else 0
    
    return (black_bitmap, white_bitmap)

def board_to_bitmap_fast(board):
    """
    修复版本: 确保位运算使用 Python int
    """
    flat_board = board.flatten()
    black_positions = np.where(flat_board == 1)[0]
    white_positions = np.where(flat_board == 2)[0]
    
    black_bitmap = 0
    white_bitmap = 0
    
    for pos in black_positions:
        black_bitmap |= (1 << int(pos))  # 关键: 转为 Python int
    
    for pos in white_positions:
        white_bitmap |= (1 << int(pos))  # 关键: 转为 Python int
    
    return (black_bitmap, white_bitmap)

def bitmap_to_board(black_bitmap, white_bitmap, board_size=15):
    """
    将两个bitmap整数转换回棋盘 (向量化版本)
    """
    board = np.zeros(board_size * board_size, dtype=np.int8)
    
    # 找出所有设置了的位
    for pos in range(board_size * board_size):
        if black_bitmap & (1 << pos):
            board[pos] = 1
        elif white_bitmap & (1 << pos):
            board[pos] = 2
    
    return board.reshape(board_size, board_size)

def board_to_hash(board, my_player):
    """将棋盘和玩家ID转换为哈希键"""
    bitmap_tuple = board_to_bitmap_fast(board)
    return (bitmap_tuple, my_player)

def store_board_advantage(board, my_player, adv_f):
    """存储棋盘状态、玩家及优势值"""
    bitmap_tuple = board_to_bitmap_fast(board)
    board_hash_table[(bitmap_tuple, my_player)] = adv_f

def get_board_advantage(board, my_player):
    """获取棋盘优势值,不存在返回None"""
    bitmap_tuple = board_to_bitmap_fast(board)
    return board_hash_table.get((bitmap_tuple, my_player))

def has_board_in_cache(board, my_player):
    """检查棋盘是否已缓存"""
    bitmap_tuple = board_to_bitmap_fast(board)
    return (bitmap_tuple, my_player) in board_hash_table

def save_board_hash_table(filename='board_hash_table.pkl'):
    """保存棋盘哈希表到文件"""
    with open(filename, 'wb') as f:
        pickle.dump(board_hash_table, f)
    print(f"已保存 {len(board_hash_table)} 个棋盘状态到 {filename}")
    
    # 估算文件大小
    file_size = os.path.getsize(filename)
    if file_size < 1024:
        size_str = f"{file_size} B"
    elif file_size < 1024 * 1024:
        size_str = f"{file_size / 1024:.2f} KB"
    else:
        size_str = f"{file_size / (1024 * 1024):.2f} MB"
    print(f"文件大小: {size_str}")

def load_board_hash_table(filename='board_hash_table.pkl'):
    """从文件加载棋盘哈希表"""
    global board_hash_table
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            board_hash_table = pickle.load(f)
        print(f"已从 {filename} 加载 {len(board_hash_table)} 个棋盘状态")
        
        # 显示文件大小
        file_size = os.path.getsize(filename)
        if file_size < 1024:
            size_str = f"{file_size} B"
        elif file_size < 1024 * 1024:
            size_str = f"{file_size / 1024:.2f} KB"
        else:
            size_str = f"{file_size / (1024 * 1024):.2f} MB"
        print(f"文件大小: {size_str}")
    else:
        print(f"文件 {filename} 不存在")

def clear_board_hash_table():
    """清空哈希表"""
    global board_hash_table
    board_hash_table = {}
    print("已清空棋盘哈希表")

def get_memory_usage():
    """估算当前哈希表的内存占用"""
    if not board_hash_table:
        return "0 B"
    
    estimated_bytes = len(board_hash_table) * 122
    
    if estimated_bytes < 1024:
        return f"{estimated_bytes} B"
    elif estimated_bytes < 1024 * 1024:
        return f"{estimated_bytes / 1024:.2f} KB"
    else:
        return f"{estimated_bytes / (1024 * 1024):.2f} MB"

In [None]:
from gamec import *
from display import GomokuUI
from copy import deepcopy
import pygame
import numpy as np
import numpy as np
import copy
import time

# 定义无穷大
INF = 999999999

def advantage_f(game,l3, l4, my_player):
    """
    局势评估函数
    注意：建议大幅提高 l4 的权重，因为活四通常意味着必胜/必防
    """
    opponent = 3 - my_player
    score = 0
    # 调整权重：活4极其重要，给极高分
    score += (l4.get(my_player, 0) - l4.get(opponent, 0)) * 20
    score += (l3.get(my_player, 0) - l3.get(opponent, 0)) * 1
    store_board_advantage(game.board, my_player, score)
    # -25 +25
    return score


def basic_ai_move(game:GomokuCore, my_player, depth=3, epsilon=0.3, top_k=5):
    """
    带 epsilon-greedy 探索的 AI 决策
    
    参数:
        epsilon: 探索率,在 [0, 1] 之间。概率 epsilon 时从 top_k 候选中随机选择
        top_k: 选择前 k 个高分候选进行随机探索
    """
    # 1. 获取候选点
    candidates = game.recommand_positions()
    
    # 2. 评估所有候选点
    scored_moves = []  # 格式: (score, (r, c))
    
    for r, c in candidates:
        next_game = copy.deepcopy(game)
        success = next_game.place_stone(r, c)
        if success:
            # 如果这步直接赢了,直接返回,不需要探索
            if next_game.winner == my_player:
                return (r, c)
            score = minimax(next_game, depth - 1, -INF, INF, False, my_player)
            scored_moves.append((score, (r, c)))
    
    # 3. 按分数降序排序
    scored_moves.sort(key=lambda x: x[0], reverse=True)
    
    # 4. Epsilon-greedy 策略
    if np.random.random() < epsilon:
        # 探索: 从 top_k 高分候选中随机选择
        top_candidates = scored_moves[:min(top_k, len(scored_moves))]
        _, best_move = top_candidates[np.random.randint(len(top_candidates))]
    else:
        # 利用: 选择最高分
        _, best_move = scored_moves[0]
    
    return best_move


def minimax(game:GomokuCore, depth, alpha, beta, is_maximizing, my_player, top_k=15):
    """
    带排序剪枝(Beam Search)的 Minimax
    每次搜索都缓存board
    """
    # 1. 检查游戏结束
    if game.game_over:
        if game.winner == my_player:
            return INF
        elif game.winner == (3 - my_player):
            return -INF
        else:
            return 0

    # 2. 达到深度限制 (Leaf Node)
    if depth == 0:
        return advantage_f(game,game.l3_count, game.l4_count, my_player)

    # 3. 生成并评估所有候选状态 (这是本层的核心开销)
    candidates = game.recommand_positions()
    scored_moves = [] # 格式: (score, next_game_instance)

    for r, c in candidates:
        # 这里的 deepcopy 是必须的，为了计算该状态的得分
        next_game = copy.deepcopy(game)
        success = next_game.place_stone(r, c)
        
        if success:
            # 如果这一步直接导致游戏结束，赋予极值，确保它会被排在第一位
            if next_game.game_over:
                current_score = INF if next_game.winner == my_player else -INF
            else:
                # 计算启发式分数 (Heuristic Score)
                current_score = advantage_f(next_game,next_game.l3_count, next_game.l4_count, my_player)
            
            scored_moves.append((current_score, next_game))

    # 4. 排序与截断 (Sorting & Pruning)
    # 如果没有合法走法 (比如平局填满)，直接返回平局分
    if not scored_moves:
        return 0

    if is_maximizing:
        # Max层：希望分数越高越好，所以降序排列 (Reverse=True)
        scored_moves.sort(key=lambda x: x[0], reverse=True)
        
        # 只取前 top_k
        best_moves = scored_moves[:top_k]
        
        max_eval = -INF
        for _, next_game_state in best_moves:
            # 递归时不需要再 deepcopy 了，因为 scored_moves 里存的已经是独立的副本
            eval_score = minimax(next_game_state, depth - 1, alpha, beta, False, my_player, top_k)
            max_eval = max(max_eval, eval_score)
            alpha = max(alpha, eval_score)
            if beta <= alpha:
                break
        return max_eval

    else:
        # Min层：对手希望分数越低越好(对我越不利)，所以升序排列 (Reverse=False)
        scored_moves.sort(key=lambda x: x[0], reverse=False)
        
        # 只取前 top_k
        best_moves = scored_moves[:top_k]
        
        min_eval = INF
        for _, next_game_state in best_moves:
            eval_score = minimax(next_game_state, depth - 1, alpha, beta, True, my_player, top_k)
            min_eval = min(min_eval, eval_score)
            beta = min(beta, eval_score)
            if beta <= alpha:
                break
        return min_eval

pygame 2.6.1 (SDL 2.28.4, Python 3.11.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


  from pkg_resources import resource_stream, resource_exists


In [9]:
def main_ai_vs_ai(epsilon1=0.1, epsilon2=0.1, depth1=4, depth2=4, show_ui=True, max_games=100):
    """
    AI vs AI 自动对弈
    
    参数:
        epsilon1: AI1 的探索率
        epsilon2: AI2 的探索率
        depth1: AI1 的搜索深度
        depth2: AI2 的搜索深度
        show_ui: 是否显示界面
        max_games: 最大对弈局数
    """
    # 统计数据
    stats = {1: 0, 2: 0, 0: 0}  # 玩家1胜、玩家2胜、平局
    
    if show_ui:
        ui = GomokuUI(board_size=15, cell_size=40)
        clock = pygame.time.Clock()
    
    for game_num in range(max_games):
        game = GomokuCore(board_size=15)
        running = True
        move_count = 0
        
        print(f"\n========== 第 {game_num + 1} 局 ==========")
        
        while running and not game.game_over:
            if show_ui:
                # 事件处理（仅处理关闭窗口）
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        pygame.quit()
                        print(f"\n总战绩: AI1胜{stats[1]}局, AI2胜{stats[2]}局, 平局{stats[0]}局")
                        return stats
            
            # AI 走棋
            current_player = game.current_player
            if current_player == 1:
                row, col = basic_ai_move(game, my_player=1, depth=depth1, epsilon=epsilon1)
                print(f"AI1 (黑方) 落子: ({row}, {col})")
            else:
                row, col = basic_ai_move(game, my_player=2, depth=depth2, epsilon=epsilon2)
                print(f"AI2 (白方) 落子: ({row}, {col})")
            
            game.place_stone(row, col)
            move_count += 1
            
            if show_ui:
                # 渲染
                ui.draw(
                    board_array=game.get_board(), 
                    current_player=game.current_player,
                    game_over=game.game_over,
                    winner=game.winner,
                    l3_count=game.l3_count,
                    l4_count=game.l4_count,
                    last_move=game.last_move
                )
                clock.tick(5)  # 每秒5步，方便观看
        
        # 游戏结束统计
        if game.game_over:
            stats[game.winner] += 1
            if game.winner == 1:
                print(f"AI1 (黑方) 获胜! 用时 {move_count} 步")
            elif game.winner == 2:
                print(f"AI2 (白方) 获胜! 用时 {move_count} 步")
            else:
                print(f"平局! 用时 {move_count} 步")
            
            print(f"当前战绩: AI1胜{stats[1]}局, AI2胜{stats[2]}局, 平局{stats[0]}局")
            print(f"缓存大小: {len(board_hash_table)} 个状态 占用内存: {get_memory_usage()}")
            if show_ui:
                time.sleep(0.2)
    
    if show_ui:
        pygame.quit()
    
    print(f"\n========== 对弈结束 ==========")
    print(f"总战绩: AI1胜{stats[1]}局, AI2胜{stats[2]}局, 平局{stats[0]}局")
    print(f"AI1胜率: {stats[1]/max_games*100:.1f}%")
    print(f"缓存大小: {len(board_hash_table)} 个状态")
    
    return stats

main_ai_vs_ai(epsilon1=0.3, epsilon2=0.31, depth1=2, depth2=2, show_ui=True, max_games=100)


AI1 (黑方) 落子: (7, 7)
AI2 (白方) 落子: (7, 8)
AI1 (黑方) 落子: (8, 8)
AI2 (白方) 落子: (9, 9)
AI1 (黑方) 落子: (9, 7)
AI2 (白方) 落子: (8, 7)
AI1 (黑方) 落子: (7, 9)
AI2 (白方) 落子: (6, 9)
AI1 (黑方) 落子: (6, 10)
AI2 (白方) 落子: (9, 8)
AI1 (黑方) 落子: (10, 6)
AI1 (黑方) 获胜! 用时 11 步
当前战绩: AI1胜1局, AI2胜0局, 平局0局
缓存大小: 98362 个状态 占用内存: 11.44 MB

AI1 (黑方) 落子: (7, 6)
AI2 (白方) 落子: (7, 5)
AI1 (黑方) 落子: (8, 5)
AI2 (白方) 落子: (9, 4)
AI1 (黑方) 落子: (8, 6)
AI2 (白方) 落子: (8, 4)
AI1 (黑方) 落子: (7, 4)
AI2 (白方) 落子: (9, 6)
AI1 (黑方) 落子: (9, 3)
AI2 (白方) 落子: (6, 5)
AI1 (黑方) 落子: (9, 5)
AI2 (白方) 落子: (10, 4)
AI1 (黑方) 落子: (7, 3)
AI2 (白方) 落子: (8, 3)
AI1 (黑方) 落子: (6, 4)
AI2 (白方) 落子: (10, 5)
AI1 (黑方) 落子: (8, 2)
AI2 (白方) 落子: (11, 4)
AI1 (黑方) 落子: (6, 3)
AI2 (白方) 落子: (12, 4)
AI2 (白方) 获胜! 用时 20 步
当前战绩: AI1胜1局, AI2胜1局, 平局0局
缓存大小: 157340 个状态 占用内存: 18.31 MB

AI1 (黑方) 落子: (6, 7)
AI2 (白方) 落子: (7, 7)
AI1 (黑方) 落子: (6, 8)
AI2 (白方) 落子: (5, 8)
AI1 (黑方) 落子: (5, 7)
AI2 (白方) 落子: (7, 8)
AI1 (黑方) 落子: (6, 9)
AI2 (白方) 落子: (7, 9)
AI1 (黑方) 落子: (6, 6)
AI2 (白方) 落子: (5, 9)
AI1 (黑方) 落子:

{1: 24, 2: 7, 0: 0}

In [10]:
import pickle
import os


save_board_hash_table("boombianv1.pkl")

已保存 2018545 个棋盘状态到 boombianv1.pkl
文件大小: 104.31 MB


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pickle
import os
from tqdm import tqdm

# 假设你的网络定义在这里，如果是在其他文件请 import
# class GMKnn(nn.Module): ... 

class GomokuFullDataset(Dataset):
    """
    全内存加载版本: 
    初始化时将所有 bitmap 转换为向量并存储在 RAM 中。
    优点: 训练速度最快，GPU 利用率最高。
    缺点: 启动慢（需要预处理），内存占用大。
    """
    
    def __init__(self, source_data):
        """
        :param source_data: 可以是文件名(str) 或 已经加载的字典(dict)
        """
        # 1. 加载原始数据
        if isinstance(source_data, str):
            print(f"正在读取文件: {source_data} ...")
            with open(source_data, 'rb') as f:
                self.hash_table = pickle.load(f)
        else:
            self.hash_table = source_data
            
        total_len = len(self.hash_table)
        print(f"数据集共 {total_len} 条，正在预处理到内存...")

        # 2. 预分配内存 (Numpy 数组比 List append 更快且省内存)
        # 225 是棋盘大小 15x15
        self.my_boards = np.zeros((total_len, 225), dtype=np.float32)
        self.opp_boards = np.zeros((total_len, 225), dtype=np.float32)
        self.targets = np.zeros((total_len, 1), dtype=np.float32)

        # 3. 转换数据
        keys = list(self.hash_table.keys())
        
        for idx, key in enumerate(tqdm(keys, desc="预处理数据")):
            bitmap_tuple, my_player = key
            black_bitmap, white_bitmap = bitmap_tuple
            adv_score = self.hash_table[key]

            # 转换 bitmap -> vector
            black_vec = self._bitmap_to_vector_fast(black_bitmap)
            white_vec = self._bitmap_to_vector_fast(white_bitmap)

            # 区分己方/对方
            if my_player == 1:
                self.my_boards[idx] = black_vec
                self.opp_boards[idx] = white_vec
            else:
                self.my_boards[idx] = white_vec
                self.opp_boards[idx] = black_vec
            
            self.targets[idx] = adv_score

        # 4. 转为 Tensor (保持在 CPU 内存中，batch 时再发往 GPU)
        self.my_boards = torch.from_numpy(self.my_boards)
        self.opp_boards = torch.from_numpy(self.opp_boards)
        self.targets = torch.from_numpy(self.targets)
        
        print("数据预处理完成，已全部加载至内存。")

    def _bitmap_to_vector_fast(self, bitmap):
        """优化的向量转换"""
        vec = np.zeros(225, dtype=np.float32)
        # 如果追求极致预处理速度，这里可以用位运算优化，
        # 但既然只跑一次，保持原逻辑也可，这里为了清晰沿用逻辑
        for pos in range(225):
            if bitmap & (1 << pos):
                vec[pos] = 1.0
        return vec

    def __len__(self):
        return len(self.my_boards)

    def __getitem__(self, idx):
        # 直接返回 Tensor 切片，速度极快
        return self.my_boards[idx], self.opp_boards[idx], self.targets[idx]


def train_model_in_memory(source_data, epochs=100, batch_size=4096, lr=0.001):
    """
    全内存训练流程
    """
    
    # 1. 准备数据 (一次性加载)
    # 这一步会消耗较多时间，但之后每个 Epoch 都会非常快
    dataset = GomokuFullDataset(source_data)
    
    # 划分训练集和验证集 (8:2)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
    
    # 注意: 数据已在内存，num_workers=0 通常最快，避免多进程复制内存开销
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=0,  # 内存已就绪，主进程直接取
        pin_memory=True # 加速 CPU -> GPU 传输
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False,
        num_workers=0,
        pin_memory=True
    )
    
    # 2. 初始化模型
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    model = GMKnn().to(device) # 请确保 GMKnn 类已定义
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    # 3. 训练循环
    best_val_loss = float('inf')
    
    for epoch in range(epochs):
        # ========== 训练阶段 ==========
        model.train()
        train_loss = 0.0
        train_count = 0
        
        train_pbar = tqdm(
            train_loader, 
            desc=f"Epoch {epoch+1}/{epochs} [Train]",
            leave=True,
            ncols=100
        )
        
        for my_board, opp_board, target in train_pbar:
            # 搬运数据到 GPU
            my_board = my_board.to(device, non_blocking=True)
            opp_board = opp_board.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            
            optimizer.zero_grad()
            output = model(my_board, opp_board)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            batch_loss = loss.item()
            train_loss += batch_loss * my_board.size(0)
            train_count += my_board.size(0)
            
            train_pbar.set_postfix({'loss': f'{batch_loss:.4f}'})
        
        train_loss /= train_count
        
        # ========== 验证阶段 ==========
        model.eval()
        val_loss = 0.0
        val_count = 0
        
        # 验证集不计算梯度，速度更快
        with torch.no_grad():
            for my_board, opp_board, target in val_loader:
                my_board = my_board.to(device, non_blocking=True)
                opp_board = opp_board.to(device, non_blocking=True)
                target = target.to(device, non_blocking=True)
                
                output = model(my_board, opp_board)
                loss = criterion(output, target)
                
                val_loss += loss.item() * my_board.size(0)
                val_count += my_board.size(0)
        
        val_loss /= val_count
        
        # 保存最佳模型
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            saved_msg = " [Saved Best]"
        else:
            saved_msg = ""
        
        print(f"Epoch {epoch+1}: Train Loss={train_loss:.5f}, Val Loss={val_loss:.5f}{saved_msg}")

    print(f"\n训练完成! 最佳验证损失: {best_val_loss:.5f}")
    return model

# ============ 使用方法 ============

if __name__ == '__main__':
    # 请确保这里定义了 GMKnn 类
    # class GMKnn(nn.Module): ...

    data_path = "boombianv1.pkl"
    
    if os.path.exists(data_path):
        # 只需要调用这一行
        model = train_model_in_memory(data_path, epochs=100, batch_size=32768, lr=0.001)
    else:
        print(f"找不到文件 {data_path}")

正在读取文件: boombianv1.pkl ...
数据集共 2018545 条，正在预处理到内存...


预处理数据: 100%|██████████| 2018545/2018545 [01:15<00:00, 26759.88it/s]


数据预处理完成，已全部加载至内存。
使用设备: cuda


Epoch 1/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.12it/s, loss=67.3151]


Epoch 1: Train Loss=78.56491, Val Loss=66.91256 [Saved Best]


Epoch 2/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=42.2140]


Epoch 2: Train Loss=54.16328, Val Loss=42.04474 [Saved Best]


Epoch 3/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.11it/s, loss=25.4324]


Epoch 3: Train Loss=32.77551, Val Loss=24.90668 [Saved Best]


Epoch 4/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.08it/s, loss=17.8170]


Epoch 4: Train Loss=21.01022, Val Loss=18.19727 [Saved Best]


Epoch 5/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.09it/s, loss=15.5333]


Epoch 5: Train Loss=16.44383, Val Loss=15.36955 [Saved Best]


Epoch 6/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.10it/s, loss=12.4162]


Epoch 6: Train Loss=13.92848, Val Loss=13.06320 [Saved Best]


Epoch 7/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.11it/s, loss=11.7969]


Epoch 7: Train Loss=12.16396, Val Loss=11.59672 [Saved Best]


Epoch 8/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.05it/s, loss=10.1747]


Epoch 8: Train Loss=10.86454, Val Loss=10.37467 [Saved Best]


Epoch 9/100 [Train]: 100%|█████████████████████████████| 50/50 [00:24<00:00,  2.08it/s, loss=8.9761]


Epoch 9: Train Loss=9.80364, Val Loss=9.59408 [Saved Best]


Epoch 10/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.09it/s, loss=8.4997]


Epoch 10: Train Loss=9.28725, Val Loss=8.96507 [Saved Best]


Epoch 11/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.13it/s, loss=8.3477]


Epoch 11: Train Loss=8.36169, Val Loss=8.20713 [Saved Best]


Epoch 12/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.05it/s, loss=7.1651]


Epoch 12: Train Loss=7.82718, Val Loss=7.81403 [Saved Best]


Epoch 13/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.14it/s, loss=7.1164]


Epoch 13: Train Loss=7.41888, Val Loss=7.25892 [Saved Best]


Epoch 14/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.12it/s, loss=6.9053]


Epoch 14: Train Loss=6.92369, Val Loss=7.12117 [Saved Best]


Epoch 15/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.15it/s, loss=6.7281]


Epoch 15: Train Loss=6.52523, Val Loss=6.52380 [Saved Best]


Epoch 16/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=5.8206]


Epoch 16: Train Loss=6.31690, Val Loss=6.13713 [Saved Best]


Epoch 17/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.13it/s, loss=6.0211]


Epoch 17: Train Loss=5.87979, Val Loss=5.93348 [Saved Best]


Epoch 18/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.11it/s, loss=6.2249]


Epoch 18: Train Loss=5.72068, Val Loss=5.60627 [Saved Best]


Epoch 19/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.02it/s, loss=5.3069]


Epoch 19: Train Loss=5.32507, Val Loss=5.35452 [Saved Best]


Epoch 20/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.08it/s, loss=4.7012]


Epoch 20: Train Loss=5.14106, Val Loss=5.11763 [Saved Best]


Epoch 21/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.08it/s, loss=4.8927]


Epoch 21: Train Loss=5.24192, Val Loss=4.90256 [Saved Best]


Epoch 22/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.04it/s, loss=4.0741]


Epoch 22: Train Loss=4.64429, Val Loss=4.86642 [Saved Best]


Epoch 23/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=4.1835]


Epoch 23: Train Loss=4.49165, Val Loss=4.54540 [Saved Best]


Epoch 24/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.06it/s, loss=4.4304]


Epoch 24: Train Loss=4.31059, Val Loss=4.34428 [Saved Best]


Epoch 25/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.06it/s, loss=4.0232]


Epoch 25: Train Loss=4.20810, Val Loss=4.42883


Epoch 26/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.03it/s, loss=3.5496]


Epoch 26: Train Loss=4.11320, Val Loss=4.07344 [Saved Best]


Epoch 27/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.06it/s, loss=3.8232]


Epoch 27: Train Loss=3.85751, Val Loss=3.88247 [Saved Best]


Epoch 28/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.05it/s, loss=4.1411]


Epoch 28: Train Loss=3.75306, Val Loss=3.88635


Epoch 29/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=6.6513]


Epoch 29: Train Loss=6.60326, Val Loss=6.00243


Epoch 30/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.08it/s, loss=3.6389]


Epoch 30: Train Loss=4.40655, Val Loss=3.79070 [Saved Best]


Epoch 31/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.09it/s, loss=3.5550]


Epoch 31: Train Loss=3.53996, Val Loss=3.58719 [Saved Best]


Epoch 32/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.04it/s, loss=3.4886]


Epoch 32: Train Loss=3.39232, Val Loss=3.45866 [Saved Best]


Epoch 33/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=3.1652]


Epoch 33: Train Loss=3.29727, Val Loss=3.36661 [Saved Best]


Epoch 34/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.08it/s, loss=3.3964]


Epoch 34: Train Loss=3.21640, Val Loss=3.29223 [Saved Best]


Epoch 35/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.08it/s, loss=2.6068]


Epoch 35: Train Loss=3.14709, Val Loss=3.22519 [Saved Best]


Epoch 36/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.08it/s, loss=3.2015]


Epoch 36: Train Loss=3.06985, Val Loss=3.14022 [Saved Best]


Epoch 37/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.05it/s, loss=2.9384]


Epoch 37: Train Loss=2.99381, Val Loss=3.06751 [Saved Best]


Epoch 38/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=2.8302]


Epoch 38: Train Loss=2.92439, Val Loss=2.99695 [Saved Best]


Epoch 39/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.05it/s, loss=2.9503]


Epoch 39: Train Loss=2.86372, Val Loss=2.92559 [Saved Best]


Epoch 40/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.06it/s, loss=2.9214]


Epoch 40: Train Loss=2.78456, Val Loss=2.91554 [Saved Best]


Epoch 41/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=2.5616]


Epoch 41: Train Loss=2.72982, Val Loss=2.79824 [Saved Best]


Epoch 42/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.06it/s, loss=2.4026]


Epoch 42: Train Loss=2.65321, Val Loss=2.73029 [Saved Best]


Epoch 43/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.06it/s, loss=2.2304]


Epoch 43: Train Loss=2.59392, Val Loss=2.66048 [Saved Best]


Epoch 44/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.07it/s, loss=2.5844]


Epoch 44: Train Loss=2.54373, Val Loss=2.77199


Epoch 45/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.04it/s, loss=2.2031]


Epoch 45: Train Loss=2.50690, Val Loss=2.57284 [Saved Best]


Epoch 46/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.03it/s, loss=2.1322]


Epoch 46: Train Loss=2.41497, Val Loss=2.47763 [Saved Best]


Epoch 47/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.03it/s, loss=9.9459]


Epoch 47: Train Loss=20.12816, Val Loss=9.83564


Epoch 48/100 [Train]: 100%|████████████████████████████| 50/50 [00:24<00:00,  2.05it/s, loss=5.6010]


Epoch 48: Train Loss=7.20210, Val Loss=5.86133


Epoch 49/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.10it/s, loss=4.3870]


Epoch 49: Train Loss=5.16383, Val Loss=4.78029


Epoch 50/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.11it/s, loss=4.0874]


Epoch 50: Train Loss=4.39850, Val Loss=4.24029


Epoch 51/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.10it/s, loss=3.7613]


Epoch 51: Train Loss=3.95638, Val Loss=3.86935


Epoch 52/100 [Train]: 100%|████████████████████████████| 50/50 [00:23<00:00,  2.10it/s, loss=3.6469]


Epoch 52: Train Loss=3.63519, Val Loss=3.59031


Epoch 53/100 [Train]: 100%|████████████████████████████| 50/50 [00:22<00:00,  2.21it/s, loss=3.7216]


Epoch 53: Train Loss=3.39047, Val Loss=3.38753


Epoch 54/100 [Train]:  10%|██▉                          | 5/50 [00:02<00:21,  2.05it/s, loss=3.3573]


KeyboardInterrupt: 

In [6]:
import random
model = GMKnn()  # 假设模型已经定义并训练好了
model.load_state_dict(torch.load('best_model.pth'))  # 加载训练好的模型参数
dataset = GomokuFullDataset("boombianv1.pkl")  # 加载数据集
def evaluate_random_samples(model, dataset, count=15, device='cuda'):
    """
    从数据集中随机抽取样本进行评估，对比预测值与真实值
    """
    model.eval()  # 切换到评估模式
    total_len = len(dataset)
    
    # 随机生成 count 个不重复的索引
    random_indices = random.sample(range(total_len), count)
    
    print(f"\n{'='*20} 随机抽样评估 (共 {count} 条) {'='*20}")
    print(f"{'索引':<8} | {'预测分数 (Pred)':<15} | {'真实分数 (Label)':<15} | {'误差 (Diff)':<15}")
    print("-" * 65)
    
    mse_sum = 0.0
    
    with torch.no_grad(): # 不计算梯度
        for idx in random_indices:
            # 1. 获取单条数据
            # dataset[idx] 返回的是 (my_board, opp_board, target)
            my_board, opp_board, target = dataset[idx]
            
            # 2. 增加 batch 维度 (225,) -> (1, 225) 并移至设备
            my_input = my_board.unsqueeze(0).to(device)
            opp_input = opp_board.unsqueeze(0).to(device)
            target_val = target.item() # 获取真实值的 float
            
            # 3. 模型预测
            pred = model(my_input, opp_input)
            pred_val = pred.item() # 获取预测值的 float
            
            # 4. 计算差异
            diff = abs(pred_val - target_val)
            mse_sum += diff ** 2
            
            # 5. 打印结果
            # 绿色显示误差小的，红色显示误差大的 (如果支持终端颜色，否则只打印数值)
            print(f"{idx:<8} | {pred_val:<15.4f} | {target_val:<15.4f} | {diff:<15.4f}")

    avg_mse = mse_sum / count
    print("-" * 65)
    print(f"抽样平均误差 (MSE): {avg_mse:.6f}")
    print(f"{'='*60}\n")

# ============ 使用示例 ============

# 假设你刚刚训练完，model 和 dataset 都在内存中
# 如果没有 dataset，你需要重新实例化: dataset = GomokuFullDataset("boombianv1.pkl")

# 确保模型在正确的设备上
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# 如果你是从文件加载模型:
# model.load_state_dict(torch.load('best_model.pth'))

# 开始评估
evaluate_random_samples(model, dataset, count=15, device=device)

正在读取文件: boombianv1.pkl ...
数据集共 2018545 条，正在预处理到内存...


预处理数据: 100%|██████████| 2018545/2018545 [01:11<00:00, 28216.50it/s]


数据预处理完成，已全部加载至内存。

索引       | 预测分数 (Pred)     | 真实分数 (Label)    | 误差 (Diff)      
-----------------------------------------------------------------
871686   | -1.0264         | 0.0000          | 1.0264         
693869   | 0.1447          | 0.0000          | 0.1447         
1225103  | -19.9871        | -20.0000        | 0.0129         
1556179  | -0.0949         | 0.0000          | 0.0949         
1329479  | -0.7310         | -1.0000         | 0.2690         
703037   | -0.1127         | 0.0000          | 0.1127         
1677977  | -0.0321         | 0.0000          | 0.0321         
380001   | -19.9245        | -20.0000        | 0.0755         
215180   | 0.7819          | 0.0000          | 0.7819         
103131   | 0.9337          | 0.0000          | 0.9337         
486863   | -0.2126         | 0.0000          | 0.2126         
1241893  | 0.6019          | -1.0000         | 1.6019         
1047971  | -0.9436         | -1.0000         | 0.0564         
848235   | 0.3536          | 0.00