In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GMKnn(nn.Module):
    def __init__(self):
        super(GMKnn, self).__init__()
        self.myin = nn.Linear(15*15, 128)
        self.oppin = nn.Linear(15*15, 128)
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 32)
        self.q = nn.Linear(32, 1)
    
    def forward(self,board1,board2):
        # board1 board2 全都是正常向量
        # batch*225
        x1 = F.relu(self.myin(board1))
        x2 = F.relu(self.oppin(board2))
        x = torch.cat([x1, x2], dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        q = self.q(x)
        return q

## 让mlp学t+0优势函数

In [2]:
import pickle
import os
import numpy as np

# 全局哈希表: (board_bitmap_tuple, my_player) -> advantage_f
# board_bitmap_tuple = (black_bitmap_int, white_bitmap_int)
board_hash_table = {}

def board_to_bitmap(board):
    """
    将15x15棋盘转换为两个bitmap整数
    board[i][j] = 1 表示黑棋, = 2 表示白棋, = 0 表示空
    返回: (black_bitmap, white_bitmap) 两个int
    """
    # 方法1: 使用 numpy 布尔索引 + packbits (最快)
    black_bits = (board == 1).flatten()
    white_bits = (board == 2).flatten()
    
    # 转换为整数 (从bit array到int)
    black_bitmap = int(''.join(black_bits.astype(int).astype(str)), 2) if black_bits.any() else 0
    white_bitmap = int(''.join(white_bits.astype(int).astype(str)), 2) if white_bits.any() else 0
    
    return (black_bitmap, white_bitmap)

def board_to_bitmap_fast(board):
    """
    修复版本: 确保位运算使用 Python int
    """
    flat_board = board.flatten()
    black_positions = np.where(flat_board == 1)[0]
    white_positions = np.where(flat_board == 2)[0]
    
    black_bitmap = 0
    white_bitmap = 0
    
    for pos in black_positions:
        black_bitmap |= (1 << int(pos))  # 关键: 转为 Python int
    
    for pos in white_positions:
        white_bitmap |= (1 << int(pos))  # 关键: 转为 Python int
    
    return (black_bitmap, white_bitmap)

def bitmap_to_board(black_bitmap, white_bitmap, board_size=15):
    """
    将两个bitmap整数转换回棋盘 (向量化版本)
    """
    board = np.zeros(board_size * board_size, dtype=np.int8)
    
    # 找出所有设置了的位
    for pos in range(board_size * board_size):
        if black_bitmap & (1 << pos):
            board[pos] = 1
        elif white_bitmap & (1 << pos):
            board[pos] = 2
    
    return board.reshape(board_size, board_size)

def board_to_hash(board, my_player):
    """将棋盘和玩家ID转换为哈希键"""
    bitmap_tuple = board_to_bitmap_fast(board)
    return (bitmap_tuple, my_player)

def store_board_advantage(board, my_player, adv_f):
    """存储棋盘状态、玩家及优势值"""
    bitmap_tuple = board_to_bitmap_fast(board)
    board_hash_table[(bitmap_tuple, my_player)] = adv_f

def get_board_advantage(board, my_player):
    """获取棋盘优势值,不存在返回None"""
    bitmap_tuple = board_to_bitmap_fast(board)
    return board_hash_table.get((bitmap_tuple, my_player))

def has_board_in_cache(board, my_player):
    """检查棋盘是否已缓存"""
    bitmap_tuple = board_to_bitmap_fast(board)
    return (bitmap_tuple, my_player) in board_hash_table

def save_board_hash_table(filename='board_hash_table.pkl'):
    """保存棋盘哈希表到文件"""
    with open(filename, 'wb') as f:
        pickle.dump(board_hash_table, f)
    print(f"已保存 {len(board_hash_table)} 个棋盘状态到 {filename}")
    
    # 估算文件大小
    file_size = os.path.getsize(filename)
    if file_size < 1024:
        size_str = f"{file_size} B"
    elif file_size < 1024 * 1024:
        size_str = f"{file_size / 1024:.2f} KB"
    else:
        size_str = f"{file_size / (1024 * 1024):.2f} MB"
    print(f"文件大小: {size_str}")

def load_board_hash_table(filename='board_hash_table.pkl'):
    """从文件加载棋盘哈希表"""
    global board_hash_table
    if os.path.exists(filename):
        with open(filename, 'rb') as f:
            board_hash_table = pickle.load(f)
        print(f"已从 {filename} 加载 {len(board_hash_table)} 个棋盘状态")
        
        # 显示文件大小
        file_size = os.path.getsize(filename)
        if file_size < 1024:
            size_str = f"{file_size} B"
        elif file_size < 1024 * 1024:
            size_str = f"{file_size / 1024:.2f} KB"
        else:
            size_str = f"{file_size / (1024 * 1024):.2f} MB"
        print(f"文件大小: {size_str}")
    else:
        print(f"文件 {filename} 不存在")

def clear_board_hash_table():
    """清空哈希表"""
    global board_hash_table
    board_hash_table = {}
    print("已清空棋盘哈希表")

def get_memory_usage():
    """估算当前哈希表的内存占用"""
    if not board_hash_table:
        return "0 B"
    
    estimated_bytes = len(board_hash_table) * 122
    
    if estimated_bytes < 1024:
        return f"{estimated_bytes} B"
    elif estimated_bytes < 1024 * 1024:
        return f"{estimated_bytes / 1024:.2f} KB"
    else:
        return f"{estimated_bytes / (1024 * 1024):.2f} MB"

In [3]:
from gamec import *
from display import GomokuUI
from copy import deepcopy
import pygame
import numpy as np
import numpy as np
import copy
import time

# 定义无穷大
INF = 999999999

def advantage_f(game,l3, l4, my_player):
    """
    局势评估函数
    注意：建议大幅提高 l4 的权重，因为活四通常意味着必胜/必防
    """
    opponent = 3 - my_player
    score = 0
    # 调整权重：活4极其重要，给极高分
    score += (l4.get(my_player, 0) - l4.get(opponent, 0)) * 20
    score += (l3.get(my_player, 0) - l3.get(opponent, 0)) * 1
    store_board_advantage(game.board, my_player, score)
    return score


def basic_ai_move(game:GomokuCore, my_player, depth=3, epsilon=0.3, top_k=5):
    """
    带 epsilon-greedy 探索的 AI 决策
    
    参数:
        epsilon: 探索率,在 [0, 1] 之间。概率 epsilon 时从 top_k 候选中随机选择
        top_k: 选择前 k 个高分候选进行随机探索
    """
    # 1. 获取候选点
    candidates = game.recommand_positions()
    
    # 2. 评估所有候选点
    scored_moves = []  # 格式: (score, (r, c))
    
    for r, c in candidates:
        next_game = copy.deepcopy(game)
        success = next_game.place_stone(r, c)
        if success:
            # 如果这步直接赢了,直接返回,不需要探索
            if next_game.winner == my_player:
                return (r, c)
            score = minimax(next_game, depth - 1, -INF, INF, False, my_player)
            scored_moves.append((score, (r, c)))
    
    # 3. 按分数降序排序
    scored_moves.sort(key=lambda x: x[0], reverse=True)
    
    # 4. Epsilon-greedy 策略
    if np.random.random() < epsilon:
        # 探索: 从 top_k 高分候选中随机选择
        top_candidates = scored_moves[:min(top_k, len(scored_moves))]
        _, best_move = top_candidates[np.random.randint(len(top_candidates))]
    else:
        # 利用: 选择最高分
        _, best_move = scored_moves[0]
    
    return best_move


def minimax(game:GomokuCore, depth, alpha, beta, is_maximizing, my_player, top_k=15):
    """
    带排序剪枝(Beam Search)的 Minimax
    每次搜索都缓存board
    """
    # 1. 检查游戏结束
    if game.game_over:
        if game.winner == my_player:
            return INF
        elif game.winner == (3 - my_player):
            return -INF
        else:
            return 0

    # 2. 达到深度限制 (Leaf Node)
    if depth == 0:
        return advantage_f(game,game.l3_count, game.l4_count, my_player)

    # 3. 生成并评估所有候选状态 (这是本层的核心开销)
    candidates = game.recommand_positions()
    scored_moves = [] # 格式: (score, next_game_instance)

    for r, c in candidates:
        # 这里的 deepcopy 是必须的，为了计算该状态的得分
        next_game = copy.deepcopy(game)
        success = next_game.place_stone(r, c)
        
        if success:
            # 如果这一步直接导致游戏结束，赋予极值，确保它会被排在第一位
            if next_game.game_over:
                current_score = INF if next_game.winner == my_player else -INF
            else:
                # 计算启发式分数 (Heuristic Score)
                current_score = advantage_f(next_game,next_game.l3_count, next_game.l4_count, my_player)
            
            scored_moves.append((current_score, next_game))

    # 4. 排序与截断 (Sorting & Pruning)
    # 如果没有合法走法 (比如平局填满)，直接返回平局分
    if not scored_moves:
        return 0

    if is_maximizing:
        # Max层：希望分数越高越好，所以降序排列 (Reverse=True)
        scored_moves.sort(key=lambda x: x[0], reverse=True)
        
        # 只取前 top_k
        best_moves = scored_moves[:top_k]
        
        max_eval = -INF
        for _, next_game_state in best_moves:
            # 递归时不需要再 deepcopy 了，因为 scored_moves 里存的已经是独立的副本
            eval_score = minimax(next_game_state, depth - 1, alpha, beta, False, my_player, top_k)
            max_eval = max(max_eval, eval_score)
            alpha = max(alpha, eval_score)
            if beta <= alpha:
                break
        return max_eval

    else:
        # Min层：对手希望分数越低越好(对我越不利)，所以升序排列 (Reverse=False)
        scored_moves.sort(key=lambda x: x[0], reverse=False)
        
        # 只取前 top_k
        best_moves = scored_moves[:top_k]
        
        min_eval = INF
        for _, next_game_state in best_moves:
            eval_score = minimax(next_game_state, depth - 1, alpha, beta, True, my_player, top_k)
            min_eval = min(min_eval, eval_score)
            beta = min(beta, eval_score)
            if beta <= alpha:
                break
        return min_eval

pygame 2.6.1 (SDL 2.28.4, Python 3.11.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


  from pkg_resources import resource_stream, resource_exists


In [9]:
def main_ai_vs_ai(epsilon1=0.1, epsilon2=0.1, depth1=4, depth2=4, show_ui=True, max_games=100):
    """
    AI vs AI 自动对弈
    
    参数:
        epsilon1: AI1 的探索率
        epsilon2: AI2 的探索率
        depth1: AI1 的搜索深度
        depth2: AI2 的搜索深度
        show_ui: 是否显示界面
        max_games: 最大对弈局数
    """
    # 统计数据
    stats = {1: 0, 2: 0, 0: 0}  # 玩家1胜、玩家2胜、平局
    
    if show_ui:
        ui = GomokuUI(board_size=15, cell_size=40)
        clock = pygame.time.Clock()
    
    for game_num in range(max_games):
        game = GomokuCore(board_size=15)
        running = True
        move_count = 0
        
        print(f"\n========== 第 {game_num + 1} 局 ==========")
        
        while running and not game.game_over:
            if show_ui:
                # 事件处理（仅处理关闭窗口）
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        pygame.quit()
                        print(f"\n总战绩: AI1胜{stats[1]}局, AI2胜{stats[2]}局, 平局{stats[0]}局")
                        return stats
            
            # AI 走棋
            current_player = game.current_player
            if current_player == 1:
                row, col = basic_ai_move(game, my_player=1, depth=depth1, epsilon=epsilon1)
                print(f"AI1 (黑方) 落子: ({row}, {col})")
            else:
                row, col = basic_ai_move(game, my_player=2, depth=depth2, epsilon=epsilon2)
                print(f"AI2 (白方) 落子: ({row}, {col})")
            
            game.place_stone(row, col)
            move_count += 1
            
            if show_ui:
                # 渲染
                ui.draw(
                    board_array=game.get_board(), 
                    current_player=game.current_player,
                    game_over=game.game_over,
                    winner=game.winner,
                    l3_count=game.l3_count,
                    l4_count=game.l4_count,
                    last_move=game.last_move
                )
                clock.tick(5)  # 每秒5步，方便观看
        
        # 游戏结束统计
        if game.game_over:
            stats[game.winner] += 1
            if game.winner == 1:
                print(f"AI1 (黑方) 获胜! 用时 {move_count} 步")
            elif game.winner == 2:
                print(f"AI2 (白方) 获胜! 用时 {move_count} 步")
            else:
                print(f"平局! 用时 {move_count} 步")
            
            print(f"当前战绩: AI1胜{stats[1]}局, AI2胜{stats[2]}局, 平局{stats[0]}局")
            print(f"缓存大小: {len(board_hash_table)} 个状态 占用内存: {get_memory_usage()}")
            if show_ui:
                time.sleep(0.2)
    
    if show_ui:
        pygame.quit()
    
    print(f"\n========== 对弈结束 ==========")
    print(f"总战绩: AI1胜{stats[1]}局, AI2胜{stats[2]}局, 平局{stats[0]}局")
    print(f"AI1胜率: {stats[1]/max_games*100:.1f}%")
    print(f"缓存大小: {len(board_hash_table)} 个状态")
    
    return stats

main_ai_vs_ai(epsilon1=0.3, epsilon2=0.31, depth1=2, depth2=2, show_ui=True, max_games=100)


AI1 (黑方) 落子: (7, 7)
AI2 (白方) 落子: (7, 8)
AI1 (黑方) 落子: (8, 8)
AI2 (白方) 落子: (9, 9)
AI1 (黑方) 落子: (9, 7)
AI2 (白方) 落子: (8, 7)
AI1 (黑方) 落子: (7, 9)
AI2 (白方) 落子: (6, 9)
AI1 (黑方) 落子: (6, 10)
AI2 (白方) 落子: (9, 8)
AI1 (黑方) 落子: (10, 6)
AI1 (黑方) 获胜! 用时 11 步
当前战绩: AI1胜1局, AI2胜0局, 平局0局
缓存大小: 98362 个状态 占用内存: 11.44 MB

AI1 (黑方) 落子: (7, 6)
AI2 (白方) 落子: (7, 5)
AI1 (黑方) 落子: (8, 5)
AI2 (白方) 落子: (9, 4)
AI1 (黑方) 落子: (8, 6)
AI2 (白方) 落子: (8, 4)
AI1 (黑方) 落子: (7, 4)
AI2 (白方) 落子: (9, 6)
AI1 (黑方) 落子: (9, 3)
AI2 (白方) 落子: (6, 5)
AI1 (黑方) 落子: (9, 5)
AI2 (白方) 落子: (10, 4)
AI1 (黑方) 落子: (7, 3)
AI2 (白方) 落子: (8, 3)
AI1 (黑方) 落子: (6, 4)
AI2 (白方) 落子: (10, 5)
AI1 (黑方) 落子: (8, 2)
AI2 (白方) 落子: (11, 4)
AI1 (黑方) 落子: (6, 3)
AI2 (白方) 落子: (12, 4)
AI2 (白方) 获胜! 用时 20 步
当前战绩: AI1胜1局, AI2胜1局, 平局0局
缓存大小: 157340 个状态 占用内存: 18.31 MB

AI1 (黑方) 落子: (6, 7)
AI2 (白方) 落子: (7, 7)
AI1 (黑方) 落子: (6, 8)
AI2 (白方) 落子: (5, 8)
AI1 (黑方) 落子: (5, 7)
AI2 (白方) 落子: (7, 8)
AI1 (黑方) 落子: (6, 9)
AI2 (白方) 落子: (7, 9)
AI1 (黑方) 落子: (6, 6)
AI2 (白方) 落子: (5, 9)
AI1 (黑方) 落子:

{1: 24, 2: 7, 0: 0}

In [10]:
import pickle
import os


save_board_hash_table("boombianv1.pkl")

已保存 2018545 个棋盘状态到 boombianv1.pkl
文件大小: 104.31 MB


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, IterableDataset
import numpy as np
import pickle
import os


class GomokuLazyDataset(Dataset):
    """
    懒加载版本: 只存储 keys,在 __getitem__ 时才转换
    内存占用大幅降低
    """
    
    def __init__(self, board_hash_table):
        # 只存储 keys 列表 (不预处理向量)
        self.keys = list(board_hash_table.keys())
        self.hash_table = board_hash_table
        print(f"数据集大小: {len(self.keys)} 条 (懒加载模式)")
    
    def _bitmap_to_vector(self, bitmap):
        """将 bitmap 整数转换为 225 维 float32 向量"""
        vec = np.zeros(225, dtype=np.float32)
        for pos in range(225):
            if bitmap & (1 << pos):
                vec[pos] = 1.0
        return vec
    
    def __len__(self):
        return len(self.keys)
    
    def __getitem__(self, idx):
        key = self.keys[idx]
        bitmap_tuple, my_player = key
        black_bitmap, white_bitmap = bitmap_tuple
        adv_score = self.hash_table[key]
        
        # 实时转换 bitmap -> vector
        black_vec = self._bitmap_to_vector(black_bitmap)
        white_vec = self._bitmap_to_vector(white_bitmap)
        
        # 根据 my_player 确定己方和对方
        if my_player == 1:
            my_board = black_vec
            opp_board = white_vec
        else:
            my_board = white_vec
            opp_board = black_vec
        
        return (
            torch.from_numpy(my_board),
            torch.from_numpy(opp_board),
            torch.tensor([adv_score], dtype=torch.float32)
        )


class GomokuStreamDataset(IterableDataset):
    """
    流式加载版本: 从文件流式读取,适合超大数据集
    """
    
    def __init__(self, filename, shuffle_buffer_size=10000):
        self.filename = filename
        self.shuffle_buffer_size = shuffle_buffer_size
        
        # 预先统计数据量
        with open(filename, 'rb') as f:
            data = pickle.load(f)
            self.length = len(data)
        print(f"数据集大小: {self.length} 条 (流式加载模式)")
    
    def _bitmap_to_vector(self, bitmap):
        vec = np.zeros(225, dtype=np.float32)
        for pos in range(225):
            if bitmap & (1 << pos):
                vec[pos] = 1.0
        return vec
    
    def __iter__(self):
        # 每次迭代重新加载文件
        with open(self.filename, 'rb') as f:
            data = pickle.load(f)
        
        # 随机打乱 keys
        keys = list(data.keys())
        np.random.shuffle(keys)
        
        for key in keys:
            bitmap_tuple, my_player = key
            black_bitmap, white_bitmap = bitmap_tuple
            adv_score = data[key]
            
            black_vec = self._bitmap_to_vector(black_bitmap)
            white_vec = self._bitmap_to_vector(white_bitmap)
            
            if my_player == 1:
                my_board = black_vec
                opp_board = white_vec
            else:
                my_board = white_vec
                opp_board = black_vec
            
            yield (
                torch.from_numpy(my_board),
                torch.from_numpy(opp_board),
                torch.tensor([adv_score], dtype=torch.float32)
            )
    
    def __len__(self):
        return self.length

from tqdm import tqdm

def train_model_lazy(board_hash_table, epochs=100, batch_size=4096, lr=0.001, num_workers=4):
    """使用懒加载训练神经网络"""
    
    # 1. 准备数据 (懒加载)
    dataset = GomokuLazyDataset(board_hash_table)
    
    # 划分训练集和验证集 (8:2)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
    
    # num_workers > 0 可以并行加载数据
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        persistent_workers=True if num_workers > 0 else False
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    # 2. 初始化模型
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    model = GMKnn().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    # 3. 训练循环
    best_val_loss = float('inf')
    
    for epoch in range(epochs):
        # ========== 训练阶段 ==========
        model.train()
        train_loss = 0.0
        train_count = 0
        
        # 创建训练进度条
        train_pbar = tqdm(
            train_loader, 
            desc=f"Epoch {epoch+1}/{epochs} [Train]",
            leave=True,
            ncols=120
        )
        
        for my_board, opp_board, target in train_pbar:
            my_board = my_board.to(device, non_blocking=True)
            opp_board = opp_board.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            
            optimizer.zero_grad()
            output = model(my_board, opp_board)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            batch_loss = loss.item()
            train_loss += batch_loss * my_board.size(0)
            train_count += my_board.size(0)
            
            # 更新进度条显示当前 batch loss 和 平均 loss
            avg_loss = train_loss / train_count
            train_pbar.set_postfix({
                'batch_loss': f'{batch_loss:.4f}',
                'avg_loss': f'{avg_loss:.4f}'
            })
        
        train_loss /= train_count
        
        # ========== 验证阶段 ==========
        model.eval()
        val_loss = 0.0
        val_count = 0
        
        # 创建验证进度条
        val_pbar = tqdm(
            val_loader, 
            desc=f"Epoch {epoch+1}/{epochs} [Val]  ",
            leave=True,
            ncols=120
        )
        
        with torch.no_grad():
            for my_board, opp_board, target in val_pbar:
                my_board = my_board.to(device, non_blocking=True)
                opp_board = opp_board.to(device, non_blocking=True)
                target = target.to(device, non_blocking=True)
                
                output = model(my_board, opp_board)
                loss = criterion(output, target)
                
                batch_loss = loss.item()
                val_loss += batch_loss * my_board.size(0)
                val_count += my_board.size(0)
                
                # 更新进度条
                avg_loss = val_loss / val_count
                val_pbar.set_postfix({
                    'batch_loss': f'{batch_loss:.4f}',
                    'avg_loss': f'{avg_loss:.4f}'
                })
        
        val_loss /= val_count
        
        # 保存最佳模型
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"  ✓ 保存最佳模型 (val_loss: {val_loss:.4f})")
        
        # Epoch 总结
        print(f"  Epoch {epoch+1} 完成: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, Best={best_val_loss:.4f}\n")
    
    print(f"\n训练完成! 最佳验证损失: {best_val_loss:.4f}")
    return model


def train_model_stream(filename, epochs=100, batch_size=4096, lr=0.001):
    """使用流式加载训练 (适合超大数据集)"""
    
    dataset = GomokuStreamDataset(filename)
    train_loader = DataLoader(dataset, batch_size=batch_size)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    model = GMKnn().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        train_count = 0
        
        # 创建进度条
        pbar = tqdm(
            train_loader, 
            desc=f"Epoch {epoch+1}/{epochs}",
            total=len(dataset) // batch_size + 1,
            leave=True,
            ncols=120
        )
        
        for my_board, opp_board, target in pbar:
            my_board = my_board.to(device)
            opp_board = opp_board.to(device)
            target = target.to(device)
            
            optimizer.zero_grad()
            output = model(my_board, opp_board)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            batch_loss = loss.item()
            train_loss += batch_loss * my_board.size(0)
            train_count += my_board.size(0)
            
            avg_loss = train_loss / train_count
            pbar.set_postfix({
                'batch_loss': f'{batch_loss:.4f}',
                'avg_loss': f'{avg_loss:.4f}'
            })
        
        train_loss /= train_count
        print(f"  Epoch {epoch+1} 完成: Train Loss={train_loss:.4f}\n")
    
    torch.save(model.state_dict(), 'gomoku_nn.pth')
    print("模型已保存")
    return model

# ============ 使用方法 ============

# 方法1: 懒加载 (推荐,支持验证集)
# load_board_hash_table("boombianv1.pkl")
# model = train_model_lazy(board_hash_table, epochs=100, batch_size=32768, lr=0.001, num_workers=os.cpu_count()-1)

# 方法2: 流式加载 (超大数据集,不加载到内存)
model = train_model_stream("boombianv1.pkl", epochs=100, batch_size=40960, lr=0.001)

torch.save(model.state_dict(), 'gomoku_nn.pth')
print("模型已保存到 gomoku_nn.pth")

数据集大小: 2018545 条 (流式加载模式)
使用设备: cuda


Epoch 1/100: 100%|████████████████████████████████| 50/50 [02:04<00:00,  2.49s/it, batch_loss=61.3289, avg_loss=74.3755]


  Epoch 1 完成: Train Loss=74.3755



Epoch 2/100: 100%|████████████████████████████████| 50/50 [02:02<00:00,  2.45s/it, batch_loss=35.4882, avg_loss=47.3869]


  Epoch 2 完成: Train Loss=47.3869



Epoch 3/100: 100%|████████████████████████████████| 50/50 [02:00<00:00,  2.41s/it, batch_loss=23.1802, avg_loss=28.3957]


  Epoch 3 完成: Train Loss=28.3957



Epoch 4/100: 100%|████████████████████████████████| 50/50 [02:01<00:00,  2.42s/it, batch_loss=16.7041, avg_loss=19.3470]


  Epoch 4 完成: Train Loss=19.3470



Epoch 5/100: 100%|████████████████████████████████| 50/50 [02:00<00:00,  2.41s/it, batch_loss=13.9025, avg_loss=15.7018]


  Epoch 5 完成: Train Loss=15.7018



Epoch 6/100: 100%|████████████████████████████████| 50/50 [02:01<00:00,  2.44s/it, batch_loss=13.1158, avg_loss=13.5510]


  Epoch 6 完成: Train Loss=13.5510



Epoch 7/100: 100%|████████████████████████████████| 50/50 [02:01<00:00,  2.44s/it, batch_loss=10.9456, avg_loss=12.0464]


  Epoch 7 完成: Train Loss=12.0464



Epoch 8/100: 100%|█████████████████████████████████| 50/50 [02:01<00:00,  2.43s/it, batch_loss=9.9122, avg_loss=10.8385]


  Epoch 8 完成: Train Loss=10.8385



Epoch 9/100: 100%|██████████████████████████████████| 50/50 [02:01<00:00,  2.44s/it, batch_loss=9.5672, avg_loss=9.8748]


  Epoch 9 完成: Train Loss=9.8748



Epoch 10/100: 100%|█████████████████████████████████| 50/50 [02:01<00:00,  2.44s/it, batch_loss=8.5586, avg_loss=9.0947]


  Epoch 10 完成: Train Loss=9.0947



Epoch 11/100: 100%|█████████████████████████████████| 50/50 [02:01<00:00,  2.43s/it, batch_loss=7.8468, avg_loss=8.5435]


  Epoch 11 完成: Train Loss=8.5435



Epoch 12/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.46s/it, batch_loss=8.6001, avg_loss=7.9847]


  Epoch 12 完成: Train Loss=7.9847



Epoch 13/100: 100%|█████████████████████████████████| 50/50 [02:03<00:00,  2.47s/it, batch_loss=7.2389, avg_loss=7.4372]


  Epoch 13 完成: Train Loss=7.4372



Epoch 14/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.45s/it, batch_loss=6.9096, avg_loss=6.9557]


  Epoch 14 完成: Train Loss=6.9557



Epoch 15/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.46s/it, batch_loss=6.4692, avg_loss=7.6567]


  Epoch 15 完成: Train Loss=7.6567



Epoch 16/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.46s/it, batch_loss=5.6327, avg_loss=6.3696]


  Epoch 16 完成: Train Loss=6.3696



Epoch 17/100: 100%|█████████████████████████████████| 50/50 [02:03<00:00,  2.47s/it, batch_loss=6.0287, avg_loss=6.1122]


  Epoch 17 完成: Train Loss=6.1122



Epoch 18/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.45s/it, batch_loss=5.5524, avg_loss=5.8756]


  Epoch 18 完成: Train Loss=5.8756



Epoch 19/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.45s/it, batch_loss=6.1609, avg_loss=5.6751]


  Epoch 19 完成: Train Loss=5.6751



Epoch 20/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.44s/it, batch_loss=5.8866, avg_loss=5.5232]


  Epoch 20 完成: Train Loss=5.5232



Epoch 21/100: 100%|█████████████████████████████████| 50/50 [02:01<00:00,  2.44s/it, batch_loss=6.2174, avg_loss=5.3768]


  Epoch 21 完成: Train Loss=5.3768



Epoch 22/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.45s/it, batch_loss=5.2570, avg_loss=5.2190]


  Epoch 22 完成: Train Loss=5.2190



Epoch 23/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.45s/it, batch_loss=4.6617, avg_loss=5.0601]


  Epoch 23 完成: Train Loss=5.0601



Epoch 24/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.46s/it, batch_loss=4.7986, avg_loss=4.8607]


  Epoch 24 完成: Train Loss=4.8607



Epoch 25/100: 100%|█████████████████████████████████| 50/50 [02:03<00:00,  2.47s/it, batch_loss=4.3829, avg_loss=4.8992]


  Epoch 25 完成: Train Loss=4.8992



Epoch 26/100: 100%|█████████████████████████████████| 50/50 [02:03<00:00,  2.47s/it, batch_loss=4.8727, avg_loss=4.4648]


  Epoch 26 完成: Train Loss=4.4648



Epoch 27/100: 100%|█████████████████████████████████| 50/50 [02:02<00:00,  2.45s/it, batch_loss=5.0640, avg_loss=4.4318]


  Epoch 27 完成: Train Loss=4.4318



Epoch 28/100: 100%|█████████████████████████████████| 50/50 [02:03<00:00,  2.47s/it, batch_loss=3.8687, avg_loss=4.2848]


  Epoch 28 完成: Train Loss=4.2848



Epoch 29/100: 100%|█████████████████████████████████| 50/50 [02:03<00:00,  2.47s/it, batch_loss=3.9359, avg_loss=4.2488]


  Epoch 29 完成: Train Loss=4.2488



Epoch 30/100:  20%|██████▌                          | 10/50 [00:26<01:38,  2.47s/it, batch_loss=4.0242, avg_loss=4.0331]