In [None]:
# [CELL 1] Setup Environment
!pip install python-chess tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import chess
import chess.pgn
import os
import pickle
import shutil
import time
from tqdm import tqdm

# Ki·ªÉm tra GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"‚úÖ Device: {device}")
if device.type == 'cpu':
    print("‚ö†Ô∏è Warning: B·∫°n ƒëang ch·∫°y tr√™n CPU. H√£y b·∫≠t GPU T4 trong Runtime > Change runtime type.")

Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Collecting chess<2,>=1 (from python-chess)
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.1/6.1 MB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147775 sha256=9e8011aedc9cbd8a930cfa692f6e9480267280a673a4739fd3076e4ba3195c42
  Stored in directory: /root/.cache/pip/wheels/83/1f/4e/8f4300f7dd554eb8de70ddfed96e94d3d030ace10c5b53d447
Successfully built chess
Installing collected packages: chess, python-chess
Successfully installed chess-1.11.2 python-chess-1.999
‚úÖ Device:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# [CELL 2] Mount Drive & Setup Directories


# ƒê·ªãnh nghƒ©a ƒë∆∞·ªùng d·∫´n l√†m vi·ªác tr√™n Drive
# B·∫°n c√≥ th·ªÉ ƒë·ªïi t√™n th∆∞ m·ª•c 'ChessAI' t√πy √Ω
BASE_PATH = '/content/drive/MyDrive/TriÃÅ tueÃ£ÃÇ nhaÃÇn taÃ£o/BTL_2'
DATA_PATH = os.path.join(BASE_PATH, 'data_chess')
DATA_PATH = os.path.join(DATA_PATH, 'data_2000')
MODEL_PATH = os.path.join(BASE_PATH, 'weights')

# T·∫°o th∆∞ m·ª•c n·∫øu ch∆∞a c√≥
#os.makedirs(os.path.join(DATA_PATH, 'raw_pgn'), exist_ok=True)
os.makedirs(os.path.join(DATA_PATH, 'processed'), exist_ok=True)
os.makedirs(MODEL_PATH, exist_ok=True)

print(f"üìÇ Working directory created at: {BASE_PATH}")

üìÇ Working directory created at: /content/drive/MyDrive/TriÃÅ tueÃ£ÃÇ nhaÃÇn taÃ£o/BTL_2


In [None]:
# [CELL 3] Architecture: SmallResNet
class ResidualBlock(nn.Module):
    def __init__(self, num_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1, stride=1, bias=False)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, x):
        residual = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += residual
        out = F.relu(out)
        return out

class SmallResNet(nn.Module):
    def __init__(self, num_res_blocks=6, num_channels=64, action_size=4672):
        super(SmallResNet, self).__init__()
        # Input: 32 channels (Current + History + Aux) [cite: 30]
        self.conv_input = nn.Conv2d(32, num_channels, kernel_size=3, padding=1, stride=1, bias=False)
        self.bn_input = nn.BatchNorm2d(num_channels)

        # Backbone: Residual Tower
        self.res_blocks = nn.ModuleList([
            ResidualBlock(num_channels) for _ in range(num_res_blocks)
        ])

        # Policy Head (Actor) [cite: 43]
        self.policy_conv = nn.Conv2d(num_channels, 32, kernel_size=1, stride=1, bias=False)
        self.policy_bn = nn.BatchNorm2d(32)
        self.policy_fc = nn.Linear(32 * 8 * 8, action_size)

        # Value Head (Critic) [cite: 44]
        self.value_conv = nn.Conv2d(num_channels, 3, kernel_size=1, stride=1, bias=False)
        self.value_bn = nn.BatchNorm2d(3)
        self.value_fc1 = nn.Linear(3 * 8 * 8, 64)
        self.value_fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.bn_input(self.conv_input(x)))
        for block in self.res_blocks:
            x = block(x)

        # Policy: Softmax
        p = F.relu(self.policy_bn(self.policy_conv(x)))
        p = p.view(-1, 32 * 8 * 8)
        p = self.policy_fc(p)
        policy_out = F.softmax(p, dim=1)

        # Value: Tanh [-1, 1]
        v = F.relu(self.value_bn(self.value_conv(x)))
        v = v.view(-1, 3 * 8 * 8)
        v = F.relu(self.value_fc1(v))
        v = torch.tanh(self.value_fc2(v))

        return policy_out, v

In [None]:
# [CELL 4] Logic Core: ChessConverter
class ChessConverter:
    def __init__(self):
        # 12 channels for pieces (6 types * 2 colors)
        self.piece_map = {
            'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
            'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
        }
        self.move_to_idx = {}
        self.idx_to_move = {}
        self.next_idx = 0

    # Load/Save Map ƒë·ªÉ ƒë·∫£m b·∫£o nh·∫•t qu√°n khi train nhi·ªÅu l·∫ßn
    def load_moves_map(self, path):
        if os.path.exists(path):
            with open(path, 'rb') as f:
                d = pickle.load(f)
                self.move_to_idx = d['move_to_idx']
                self.idx_to_move = d['idx_to_move']
                self.next_idx = len(self.move_to_idx)
            print(f"üìñ Loaded Move Map: {self.next_idx} moves.")

    def save_moves_map(self, path):
        with open(path, 'wb') as f:
            pickle.dump({'move_to_idx': self.move_to_idx, 'idx_to_move': self.idx_to_move}, f)

    def encode_move(self, move_uci):
        # T·ª± ƒë·ªông g√°n index cho n∆∞·ªõc ƒëi m·ªõi
        if move_uci not in self.move_to_idx:
            if self.next_idx >= 4672: return None # Gi·ªõi h·∫°n output
            self.move_to_idx[move_uci] = self.next_idx
            self.idx_to_move[self.next_idx] = move_uci
            self.next_idx += 1
        return self.move_to_idx[move_uci]

    def board_to_tensor(self, board, prev_board=None):
        # Tri·ªÉn khai Tensor 32x8x8 theo t√†i li·ªáu [cite: 30-38]
        tensor = np.zeros((32, 8, 8), dtype=np.float32)

        # Channel 0-11: Qu√¢n hi·ªán t·∫°i
        for sq, pc in board.piece_map().items():
            tensor[self.piece_map[pc.symbol()]][chess.square_rank(sq)][chess.square_file(sq)] = 1

        # Channel 12-23: Qu√¢n qu√° kh·ª© (History T-1)
        if prev_board:
            for sq, pc in prev_board.piece_map().items():
                tensor[self.piece_map[pc.symbol()]+12][chess.square_rank(sq)][chess.square_file(sq)] = 1

        # Channel 24-31: Ph·ª• tr·ª£ (Turn, Castling, En-passant, Repetition)
        if board.turn == chess.WHITE: tensor[24,:,:] = 1
        if board.has_kingside_castling_rights(chess.WHITE): tensor[25,:,:] = 1
        if board.has_queenside_castling_rights(chess.WHITE): tensor[26,:,:] = 1
        if board.has_kingside_castling_rights(chess.BLACK): tensor[27,:,:] = 1
        if board.has_queenside_castling_rights(chess.BLACK): tensor[28,:,:] = 1
        if board.ep_square:
            tensor[29][chess.square_rank(board.ep_square)][chess.square_file(board.ep_square)] = 1
        if board.is_repetition(1): tensor[30,:,:] = 1
        if board.is_repetition(2): tensor[31,:,:] = 1

        return tensor

In [None]:
# [CELL 5] Data Processing Pipeline
def process_data_pipeline(max_games=5000):
    #RAW_DIR = os.path.join(DATA_PATH, 'raw_pgn')
    OUTPUT_FILE = os.path.join(DATA_PATH, 'processed/combined_data_2000.npz')
    MAP_FILE = os.path.join(MODEL_PATH, 'move_map.pkl')

    # 1. Setup Converter
    converter = ChessConverter()
    converter.load_moves_map(MAP_FILE) # Load c≈© n·∫øu c√≥

    states, policies, values = [], [], []
    pgn_files = [f for f in os.listdir(DATA_PATH) if f.endswith('.pgn')]

    if not pgn_files:
        print("‚ùå Kh√¥ng t√¨m th·∫•y file .pgn n√†o trong folder data/raw_pgn!")
        print("üëâ H√£y upload file PGN l√™n Drive tr∆∞·ªõc.")
        return

    print(f"üöÄ Found {len(pgn_files)} files. Processing max {max_games} games...")

    total_games = 0
    for pgn_file in pgn_files:
        path = os.path.join(DATA_PATH, pgn_file)
        pgn = open(path)

        while total_games < max_games:
            try:
                game = chess.pgn.read_game(pgn)
            except: break
            if game is None: break

            # Ch·ªâ l·∫•y v√°n c√≥ k·∫øt qu·∫£ r√µ r√†ng
            res = game.headers.get("Result", "*")
            if res not in ["1-0", "0-1", "1/2-1/2"]: continue
            val = 1.0 if res == "1-0" else (-1.0 if res == "0-1" else 0.0)

            board = game.board()
            prev_board = None

            for move in game.mainline_moves():
                s = converter.board_to_tensor(board, prev_board)
                m = converter.encode_move(move.uci()) # Serialize move

                if m is not None:
                    states.append(s)
                    policies.append(m)
                    values.append(val)

                prev_board = board.copy()
                board.push(move)

            total_games += 1
            if total_games % 100 == 0: print(f"Processing... {total_games} games done.")

    # Save Data
    print("üì¶ Saving dataset...")
    np.savez_compressed(OUTPUT_FILE, states=np.array(states), policy_targets=np.array(policies), value_targets=np.array(values))
    converter.save_moves_map(MAP_FILE)
    print(f"‚úÖ DONE! Saved to {OUTPUT_FILE}")

# Ch·∫°y x·ª≠ l√Ω (Ch·ªâ c·∫ßn ch·∫°y 1 l·∫ßn khi c√≥ d·ªØ li·ªáu m·ªõi)
process_data_pipeline(max_games=8000)

üìñ Loaded Move Map: 1908 moves.
üöÄ Found 24 files. Processing max 8000 games...
Processing... 100 games done.
Processing... 200 games done.
Processing... 300 games done.
Processing... 400 games done.
Processing... 500 games done.
Processing... 600 games done.
Processing... 700 games done.
Processing... 800 games done.
Processing... 900 games done.
Processing... 1000 games done.
Processing... 1100 games done.
Processing... 1200 games done.
Processing... 1300 games done.
Processing... 1400 games done.
Processing... 1500 games done.
Processing... 1600 games done.
Processing... 1700 games done.
Processing... 1800 games done.
Processing... 1900 games done.
Processing... 2000 games done.
Processing... 2100 games done.
Processing... 2200 games done.
Processing... 2300 games done.
Processing... 2400 games done.
Processing... 2500 games done.
Processing... 2600 games done.
Processing... 2700 games done.
Processing... 2800 games done.
Processing... 2900 games done.
Processing... 3000 games d

In [None]:
# [CELL 6] High-Speed Dataset (RAM & Local SSD)
class InMemoryDataset(Dataset):
    def __init__(self, drive_path):
        # 1. Copy t·ª´ Drive -> Local (/content) ƒë·ªÉ ƒë·ªçc si√™u nhanh
        local_path = '/content/temp_data.npz'
        if os.path.exists(drive_path):
            print(f"‚è≥ Copying data from Drive to Local Disk... (Speed Boost)")
            shutil.copy(drive_path, local_path)
        else:
            print(f"‚ùå File not found: {drive_path}")
            # T·∫°o data gi·∫£ ƒë·ªÉ test n·∫øu kh√¥ng c√≥ file th·∫≠t
            self.generate_dummy()
            return

        # 2. Load to√†n b·ªô v√†o RAM (Kh·∫Øc ph·ª•c l·ªói zlib v√† ch·∫≠m)
        print("‚è≥ Loading into RAM...")
        data = np.load(local_path)
        self.states = torch.from_numpy(data['states'])
        self.p_targets = torch.from_numpy(data['policy_targets']).long()
        self.v_targets = torch.from_numpy(data['value_targets']).float()
        print(f"‚úÖ Loaded {len(self.states)} samples ready for training.")

    def generate_dummy(self):
        print("‚ö†Ô∏è Generating DUMMY data for testing.")
        self.states = torch.randn(1000, 32, 8, 8)
        self.p_targets = torch.randint(0, 4672, (1000,)).long()
        self.v_targets = torch.randint(-1, 2, (1000,)).float()

    def __len__(self):
        return len(self.states)

    def __getitem__(self, idx):
        return {
            'state': self.states[idx],
            'p_target': self.p_targets[idx],
            'v_target': self.v_targets[idx]
        }

In [None]:
# [CELL 7] Training Loop
class AlphaZeroLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        self.nll = nn.NLLLoss() # CrossEntropy cho Policy

    def forward(self, p_pred, v_pred, p_target, v_target):
        # Loss = (z-v)^2 - pi^T*log(p) [cite: 16]
        v_loss = self.mse(v_pred.squeeze(), v_target)
        p_loss = self.nll(torch.log(p_pred + 1e-8), p_target)
        return v_loss + p_loss, v_loss, p_loss

def train(epochs=20, batch_size=256):
    DATA_FILE = os.path.join(DATA_PATH, 'processed/combined_data_2000.npz')
    MAP_FILE = os.path.join(MODEL_PATH, 'move_map.pkl')

    # Load Map ƒë·ªÉ bi·∫øt action_size
    converter = ChessConverter()
    converter.load_moves_map(MAP_FILE)
    action_size = max(converter.next_idx, 4672)

    # Setup Dataset (Fast Mode)
    dataset = InMemoryDataset(DATA_FILE)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0) # num_workers=0 v√¨ data ƒë√£ ·ªü RAM

    # Model Setup
    model = SmallResNet(action_size=action_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    criterion = AlphaZeroLoss()

    best_loss = float('inf')

    print("üöÄ START TRAINING...")
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        loop = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}")

        for batch in loop:
            states = batch['state'].to(device)
            p_t = batch['p_target'].to(device)
            v_t = batch['v_target'].to(device)

            optimizer.zero_grad()
            p_pred, v_pred = model(states)

            loss, v_l, p_l = criterion(p_pred, v_pred, p_t, v_t)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())

        avg_loss = total_loss / len(loader)
        print(f"Epoch {epoch+1} finished. Avg Loss: {avg_loss:.4f}")

        # Save Checkpoint
        torch.save(model.state_dict(), os.path.join(MODEL_PATH, 'model_latest_2000.pth'))
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), os.path.join(MODEL_PATH, 'model_best_2000.pth'))
            print("üåü Saved new BEST model.")

# RUN TRAINING
train(epochs=20)

In [None]:
# [CELL FINE-TUNE 1] Process NEW 8000 Games
# ƒê·∫∑t t√™n file ƒë·∫ßu ra kh√°c ƒëi ƒë·ªÉ ph√¢n bi·ªát v·ªõi ƒë·ª£t 1
NEW_DATA_FILE = os.path.join(DATA_PATH, 'processed/batch_4_data.npz')

def process_new_batch(max_games=8000):
    RAW_DIR = os.path.join(DATA_PATH, 'dt')
    MAP_FILE = os.path.join(MODEL_PATH, 'move_map.pkl') # B·∫ÆT BU·ªòC D√ôNG MAP C≈®

    # 1. Load Converter v·ªõi Map c≈©
    converter = ChessConverter()
    if os.path.exists(MAP_FILE):
        converter.load_moves_map(MAP_FILE)
        print(f"üìñ Loaded existing map with {converter.next_idx} moves. Ready to extend.")
    else:
        print("‚ö†Ô∏è CRITICAL WARNING: Kh√¥ng t√¨m th·∫•y move_map.pkl c≈©! Model s·∫Ω b·ªã l·ªói index.")
        return

    states, policies, values = [], [], []
    pgn_files = [f for f in os.listdir(RAW_DIR) if f.endswith('.pgn')]

    print(f"üöÄ Processing new batch from: {pgn_files}")

    total_games = 0
    for pgn_file in pgn_files:
        path = os.path.join(RAW_DIR, pgn_file)
        pgn = open(path)

        while total_games < max_games:
            try:
                game = chess.pgn.read_game(pgn)
            except: break
            if game is None: break

            res = game.headers.get("Result", "*")
            if res not in ["1-0", "0-1", "1/2-1/2"]: continue
            val = 1.0 if res == "1-0" else (-1.0 if res == "0-1" else 0.0)

            board = game.board()
            prev_board = None

            for move in game.mainline_moves():
                s = converter.board_to_tensor(board, prev_board)
                m = converter.encode_move(move.uci())

                if m is not None:
                    states.append(s)
                    policies.append(m)
                    values.append(val)
                prev_board = board.copy()
                board.push(move)

            total_games += 1
            if total_games % 500 == 0: print(f"  -> Processed {total_games} games...")

    # L∆∞u d·ªØ li·ªáu Batch 2
    print(f"üì¶ Saving Batch 2 dataset ({len(states)} samples)...")
    np.savez_compressed(NEW_DATA_FILE, states=np.array(states), policy_targets=np.array(policies), value_targets=np.array(values))

    # C·∫≠p nh·∫≠t l·∫°i Map (n·∫øu c√≥ n∆∞·ªõc ƒëi m·ªõi l·∫° xu·∫•t hi·ªán trong 8000 v√°n n√†y)
    converter.save_moves_map(MAP_FILE)
    print(f"‚úÖ DONE! Saved to {NEW_DATA_FILE}")

# Ch·∫°y x·ª≠ l√Ω batch m·ªõi
process_new_batch(max_games=8000)

üìñ Loaded Move Map: 1905 moves.
üìñ Loaded existing map with 1905 moves. Ready to extend.
üöÄ Processing new batch from: ['ficsgamesdb_2012_standard_nomovetimes_925737.pgn', 'ficsgamesdb_2011_standard_nomovetimes_925886.pgn']
  -> Processed 500 games...
  -> Processed 1000 games...
  -> Processed 1500 games...
  -> Processed 2000 games...
  -> Processed 2500 games...
  -> Processed 3000 games...
  -> Processed 3500 games...
  -> Processed 4000 games...
  -> Processed 4500 games...
  -> Processed 5000 games...
  -> Processed 5500 games...
  -> Processed 6000 games...
  -> Processed 6500 games...
  -> Processed 7000 games...
  -> Processed 7500 games...
  -> Processed 8000 games...
üì¶ Saving Batch 2 dataset (502346 samples)...
‚úÖ DONE! Saved to /content/drive/MyDrive/TriÃÅ tueÃ£ÃÇ nhaÃÇn taÃ£o/BTL_2/data_chess/processed/batch_4_data.npz


In [None]:
# [CELL FINE-TUNE 2] Load Weights & Train
def fine_tune_model(epochs=10, batch_size=256):
    # ƒê∆∞·ªùng d·∫´n file Batch 2 m·ªõi t·∫°o
    NEW_DATA_FILE = os.path.join(DATA_PATH, 'processed/batch_4_data.npz')
    MAP_FILE = os.path.join(MODEL_PATH, 'move_map.pkl')
    PREV_MODEL_PATH = os.path.join(MODEL_PATH, 'model_best.pth') # Model ƒë·ª£t 1

    # 1. Setup Resources
    converter = ChessConverter()
    converter.load_moves_map(MAP_FILE)
    action_size = max(converter.next_idx, 4672) # ƒê·∫£m b·∫£o size kh·ªõp v·ªõi map ƒë√£ update

    # 2. Load New Dataset
    dataset = InMemoryDataset(NEW_DATA_FILE) # Class n√†y ƒë√£ ƒë·ªãnh nghƒ©a ·ªü c√°c cell tr∆∞·ªõc
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

    # 3. Initialize Model
    model = SmallResNet(action_size=action_size).to(device)

    # --- PH·∫¶N QUAN TR·ªåNG NH·∫§T: LOAD WEIGHTS ---
    if os.path.exists(PREV_MODEL_PATH):
        print(f"üîÑ Loading weights from previous training: {PREV_MODEL_PATH}")
        # load_state_dict: n·∫°p to√†n b·ªô tham s·ªë w, b c≈© v√†o m·∫°ng m·ªõi
        # strict=False: cho ph√©p b·ªè qua l·ªói nh·ªè n·∫øu output size b·ªã thay ƒë·ªïi nh·∫π (do th√™m n∆∞·ªõc ƒëi m·ªõi)
        try:
            model.load_state_dict(torch.load(PREV_MODEL_PATH, map_location=device), strict=False)
            print("‚úÖ Weights loaded successfully! Fine-tuning started.")
        except Exception as e:
            print(f"‚ö†Ô∏è Warning loading weights: {e}")
            print("Tip: N·∫øu l·ªói dimension, c√≥ th·ªÉ do move_map b·ªã reset. H√£y ki·ªÉm tra l·∫°i.")
    else:
        print("‚ö†Ô∏è Kh√¥ng t√¨m th·∫•y model c≈©. S·∫Ω train t·ª´ ƒë·∫ßu (Scratch).")

    # 4. Optimizer v·ªõi Learning Rate TH·∫§P H∆†N
    # Gi·∫£m LR xu·ªëng 1e-4 (so v·ªõi 1e-3 l√∫c ƒë·∫ßu) ƒë·ªÉ tinh ch·ªânh nh·∫π nh√†ng
    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
    criterion = AlphaZeroLoss()

    best_loss = float('inf')

    # 5. Training Loop (Gi·ªëng h·ªát c≈©)
    print("üöÄ START FINE-TUNING...")
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        loop = tqdm(loader, desc=f"Fine-tune Epoch {epoch+1}/{epochs}")

        for batch in loop:
            states = batch['state'].to(device)
            p_t = batch['p_target'].to(device)
            v_t = batch['v_target'].to(device)

            optimizer.zero_grad()
            p_pred, v_pred = model(states)

            loss, v_l, p_l = criterion(p_pred, v_pred, p_t, v_t)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())

        avg_loss = total_loss / len(loader)
        print(f"Epoch {epoch+1} finished. Avg Loss: {avg_loss:.4f}")

        # L∆∞u ƒë√® l√™n model best hi·ªán t·∫°i n·∫øu t·ªët h∆°n
        torch.save(model.state_dict(), os.path.join(MODEL_PATH, 'model_latest.pth'))
        if avg_loss < best_loss:
            best_loss = avg_loss
            # B·∫°n c√≥ th·ªÉ l∆∞u th√†nh t√™n kh√°c nh∆∞ 'model_finetuned.pth' n·∫øu mu·ªën gi·ªØ file c≈©
            torch.save(model.state_dict(), os.path.join(MODEL_PATH, 'model_best.pth'))
            print("üåü Checkpoint updated.")

# Ch·∫°y Fine-tune
fine_tune_model(epochs=10) # 10 Epochs l√† ƒë·ªß cho fine-tuning

üìñ Loaded Move Map: 1908 moves.
‚è≥ Copying data from Drive to Local Disk... (Speed Boost)
‚è≥ Loading into RAM...
‚úÖ Loaded 502346 samples ready for training.
üîÑ Loading weights from previous training: /content/drive/MyDrive/TriÃÅ tueÃ£ÃÇ nhaÃÇn taÃ£o/BTL_2/weights/model_best.pth
‚úÖ Weights loaded successfully! Fine-tuning started.
üöÄ START FINE-TUNING...


Fine-tune Epoch 1/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:53<00:00, 36.72it/s, loss=3.36]


Epoch 1 finished. Avg Loss: 3.8943
üåü Checkpoint updated.


Fine-tune Epoch 2/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:52<00:00, 37.39it/s, loss=2.86]


Epoch 2 finished. Avg Loss: 3.1142
üåü Checkpoint updated.


Fine-tune Epoch 3/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:52<00:00, 37.34it/s, loss=2.76]


Epoch 3 finished. Avg Loss: 2.8376
üåü Checkpoint updated.


Fine-tune Epoch 4/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:52<00:00, 37.69it/s, loss=2.73]


Epoch 4 finished. Avg Loss: 2.5981
üåü Checkpoint updated.


Fine-tune Epoch 5/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:52<00:00, 37.71it/s, loss=2.48]


Epoch 5 finished. Avg Loss: 2.3905
üåü Checkpoint updated.


Fine-tune Epoch 6/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:51<00:00, 37.90it/s, loss=1.88]


Epoch 6 finished. Avg Loss: 2.2074
üåü Checkpoint updated.


Fine-tune Epoch 7/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:51<00:00, 37.92it/s, loss=1.68]


Epoch 7 finished. Avg Loss: 2.0475
üåü Checkpoint updated.


Fine-tune Epoch 8/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:51<00:00, 37.86it/s, loss=1.85]


Epoch 8 finished. Avg Loss: 1.9058
üåü Checkpoint updated.


Fine-tune Epoch 9/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:52<00:00, 37.74it/s, loss=1.77]


Epoch 9 finished. Avg Loss: 1.7794
üåü Checkpoint updated.


Fine-tune Epoch 10/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1963/1963 [00:52<00:00, 37.66it/s, loss=1.82]


Epoch 10 finished. Avg Loss: 1.6660
üåü Checkpoint updated.


In [None]:
import chess
import chess.svg
import torch
import numpy as np
import random
import time
import os
from IPython.display import display, clear_output

# --- 1. ƒê·ªäNH NGHƒ®A AGENT ---

class RandomAgent:
    def select_move(self, board):
        # Ch·ªçn ng·∫´u nhi√™n 1 n∆∞·ªõc trong c√°c n∆∞·ªõc h·ª£p l·ªá
        moves = list(board.legal_moves)
        return random.choice(moves) if moves else None

class NeuralAgent:
    def __init__(self, model_path, map_path, device):
        self.device = device

        # Load Converter & Map
        self.converter = ChessConverter()
        self.converter.load_moves_map(map_path)

        # Load Model
        action_size = max(self.converter.next_idx, 4672)
        self.model = SmallResNet(action_size=action_size).to(device)
        # Load weights (ch·∫•p nh·∫≠n strict=False ƒë·ªÉ tr√°nh l·ªói nh·ªè v·ªÅ size)
        self.model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
        self.model.eval()

    def select_move(self, board):
        # 1. Chuy·ªÉn ƒë·ªïi b√†n c·ªù sang Tensor
        state = self.converter.board_to_tensor(board)
        state_t = torch.tensor(state).unsqueeze(0).to(self.device) # [1, 32, 8, 8]

        # 2. Predict
        with torch.no_grad():
            policy, value = self.model(state_t)

        # 3. L·ªçc n∆∞·ªõc ƒëi h·ª£p l·ªá (Masking)
        legal_moves = list(board.legal_moves)
        best_move = None
        best_prob = -1.0

        # Duy·ªát qua c√°c n∆∞·ªõc h·ª£p l·ªá ƒë·ªÉ t√¨m n∆∞·ªõc c√≥ x√°c su·∫•t cao nh·∫•t theo Neural Net
        policy_np = policy.cpu().numpy()[0] # Vector x√°c su·∫•t

        for move in legal_moves:
            move_uci = move.uci()
            # L·∫•y index c·ªßa n∆∞·ªõc ƒëi t·ª´ map
            idx = self.converter.move_to_idx.get(move_uci, None)

            if idx is not None and idx < len(policy_np):
                prob = policy_np[idx]
                if prob > best_prob:
                    best_prob = prob
                    best_move = move

        # Fallback: N·∫øu kh√¥ng t√¨m th·∫•y n∆∞·ªõc n√†o trong map (hi·∫øm), ch·ªçn random
        if best_move is None and legal_moves:
            print("‚ö†Ô∏è Model confused (Move not in map). Random pick.")
            return random.choice(legal_moves)

        return best_move

# --- 2. H√ÄM CH·∫†Y ƒê·∫§U (ARENA) ---

def play_match(model_path, map_path, num_games=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"üèüÔ∏è Starting Arena on {device}...")

    # Kh·ªüi t·∫°o ƒë·∫•u th·ªß
    neural_player = NeuralAgent(model_path, map_path, device)
    random_player = RandomAgent()

    results = {"Neural_Win": 0, "Random_Win": 0, "Draw": 0}

    for i in range(num_games):
        board = chess.Board()

        # ƒê·ªïi m√†u qu√¢n sau m·ªói v√°n ƒë·ªÉ c√¥ng b·∫±ng
        # V√°n ch·∫µn: Neural c·∫ßm Tr·∫Øng (White)
        # V√°n l·∫ª: Neural c·∫ßm ƒêen (Black)
        neural_is_white = (i % 2 == 0)

        print(f"\n--- Game {i+1}/{num_games} ---")
        print(f"Neural plays {'White' if neural_is_white else 'Black'}")

        move_count = 0
        while not board.is_game_over(claim_draw=True):
            # X√°c ƒë·ªãnh l∆∞·ª£t ƒëi
            if board.turn == chess.WHITE:
                current_player = neural_player if neural_is_white else random_player
            else:
                current_player = random_player if neural_is_white else neural_player

            # Agent ch·ªçn n∆∞·ªõc ƒëi
            move = current_player.select_move(board)
            board.push(move)
            move_count += 1

            # (T√πy ch·ªçn) In b√†n c·ªù m·ªói 10 n∆∞·ªõc ƒë·ªÉ ƒë·ª° lag log
            # if move_count % 10 == 0: print(".", end="")

        # K·∫øt th√∫c v√°n
        res = board.result()
        print(f"\nGame Over: {res} (Moves: {move_count})")

        # Ph√¢n ƒë·ªãnh th·∫Øng thua
        if res == "1-0":
            winner = "Neural" if neural_is_white else "Random"
        elif res == "0-1":
            winner = "Random" if neural_is_white else "Neural"
        else:
            winner = "Draw"

        if winner == "Neural": results["Neural_Win"] += 1
        elif winner == "Random": results["Random_Win"] += 1
        else: results["Draw"] += 1

        print(f"Winner: {winner}")

    print("\n================ RESULT ================")
    print(f"Neural Win: {results['Neural_Win']}")
    print(f"Random Win: {results['Random_Win']}")
    print(f"Draw:       {results['Draw']}")
    print("========================================")

# --- 3. TH·ª∞C THI ---
# ƒê·∫£m b·∫£o ƒë∆∞·ªùng d·∫´n ƒë√∫ng
MODEL_FILE = os.path.join(MODEL_PATH, 'model_best_2000.pth')
MAP_FILE = os.path.join(MODEL_PATH, 'move_map.pkl')

if os.path.exists(MODEL_FILE):
    play_match(MODEL_FILE, MAP_FILE, num_games=10)
else:
    print("‚ùå Ch∆∞a c√≥ file model. H√£y train tr∆∞·ªõc!")

In [None]:
import chess
import torch
import numpy as np
import os
import random
from tqdm import tqdm  # Thanh ti·∫øn tr√¨nh
import time

# --- C·∫§U H√åNH ---
NUM_GAMES = 1000          # S·ªë l∆∞·ª£ng v√°n
MAX_MOVES_PER_GAME = 150  # Gi·ªõi h·∫°n n∆∞·ªõc ƒëi ƒë·ªÉ tr√°nh treo m√°y (X·ª≠ h√≤a n·∫øu v∆∞·ª£t qu√°)
MODEL_FILE = os.path.join(MODEL_PATH, 'model_best_2000.pth')
MAP_FILE = os.path.join(MODEL_PATH, 'move_map.pkl')

# --- ƒê·ªäNH NGHƒ®A L·∫†I AGENT (ƒê·ªÉ ƒë·∫£m b·∫£o t√≠nh nh·∫•t qu√°n) ---
class RandomAgent:
    def select_move(self, board):
        moves = list(board.legal_moves)
        return random.choice(moves) if moves else None

class NeuralAgent:
    def __init__(self, model_path, map_path, device):
        self.device = device
        self.converter = ChessConverter()
        self.converter.load_moves_map(map_path)

        # Load Model
        action_size = max(self.converter.next_idx, 4672)
        self.model = SmallResNet(action_size=action_size).to(device)

        if os.path.exists(model_path):
            self.model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
            self.model.eval()
        else:
            raise FileNotFoundError("‚ùå Kh√¥ng t√¨m th·∫•y file model!")

    def select_move(self, board):
        # Bi·∫øn ƒë·ªïi b√†n c·ªù
        state = self.converter.board_to_tensor(board)
        state_t = torch.tensor(state).unsqueeze(0).to(self.device)

        with torch.no_grad():
            policy, _ = self.model(state_t)

        legal_moves = list(board.legal_moves)
        policy_np = policy.cpu().numpy()[0]

        best_move = None
        best_prob = -1.0

        # L·∫•y n∆∞·ªõc ƒëi t·ªët nh·∫•t trong c√°c n∆∞·ªõc h·ª£p l·ªá
        for move in legal_moves:
            idx = self.converter.move_to_idx.get(move.uci(), None)
            if idx is not None and idx < len(policy_np):
                if policy_np[idx] > best_prob:
                    best_prob = policy_np[idx]
                    best_move = move

        # Fallback random n·∫øu g·∫∑p l·ªói l·∫°
        return best_move if best_move else random.choice(legal_moves)

# --- H√ÄM CH·∫†Y BENCHMARK ---
def run_benchmark():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"üöÄ Starting Benchmark: {NUM_GAMES} games on {device}...")

    try:
        neural_bot = NeuralAgent(MODEL_FILE, MAP_FILE, device)
        random_bot = RandomAgent()
    except Exception as e:
        print(f"‚ùå Error loading agent: {e}")
        return

    stats = {"Win": 0, "Loss": 0, "Draw": 0}
    start_time = time.time()

    # D√πng tqdm ƒë·ªÉ hi·ªán thanh loading
    for i in tqdm(range(NUM_GAMES), desc="Simulating"):
        board = chess.Board()

        # Ch·∫µn: Neural c·∫ßm Tr·∫Øng | L·∫ª: Neural c·∫ßm ƒêen
        neural_is_white = (i % 2 == 0)

        white_player = neural_bot if neural_is_white else random_bot
        black_player = random_bot if neural_is_white else neural_bot

        move_count = 0
        game_result = None # None: ch∆∞a xong

        # V√≤ng l·∫∑p v√°n ƒë·∫•u
        while not board.is_game_over():
            if move_count >= MAX_MOVES_PER_GAME:
                game_result = "Draw (Timeout)" # X·ª≠ h√≤a n·∫øu ƒë√°nh qu√° l√¢u
                break

            if board.turn == chess.WHITE:
                move = white_player.select_move(board)
            else:
                move = black_player.select_move(board)

            board.push(move)
            move_count += 1

        # X√°c ƒë·ªãnh k·∫øt qu·∫£
        if game_result is None:
            res = board.result() # "1-0", "0-1", "1/2-1/2"
        else:
            res = "1/2-1/2" # Timeout coi nh∆∞ h√≤a

        # T√≠nh ƒëi·ªÉm cho Neural Network
        if res == "1-0":
            if neural_is_white: stats["Win"] += 1
            else: stats["Loss"] += 1
        elif res == "0-1":
            if neural_is_white: stats["Loss"] += 1
            else: stats["Win"] += 1
        else:
            stats["Draw"] += 1

    total_time = time.time() - start_time

    # --- HI·ªÇN TH·ªä B√ÅO C√ÅO ---
    print("\n" + "="*40)
    print(f"üìä BENCHMARK REPORT ({NUM_GAMES} Games)")
    print("="*40)
    print(f"üèÜ WINS:   {stats['Win']} ({stats['Win']/NUM_GAMES*100:.2f}%)")
    print(f"‚ùå LOSSES: {stats['Loss']} ({stats['Loss']/NUM_GAMES*100:.2f}%)")
    print(f"ü§ù DRAWS:  {stats['Draw']} ({stats['Draw']/NUM_GAMES*100:.2f}%)")
    print("-" * 40)
    print(f"‚è±Ô∏è Total Time: {total_time:.1f}s")
    print(f"‚ö° Speed: {NUM_GAMES/total_time:.2f} games/sec")
    print("="*40)

# Ch·∫°y lu√¥n
run_benchmark()

üöÄ Starting Benchmark: 1000 games on cuda...
üìñ Loaded Move Map: 1914 moves.


Simulating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [01:40<00:00,  9.92it/s]


üìä BENCHMARK REPORT (1000 Games)
üèÜ WINS:   576 (57.60%)
‚ùå LOSSES: 1 (0.10%)
ü§ù DRAWS:  423 (42.30%)
----------------------------------------
‚è±Ô∏è Total Time: 100.8s
‚ö° Speed: 9.92 games/sec



