# gen_with_unsloth_top5.ipynb
Local Unsloth-based generator that saves Top-5 engine candidates + XML answer per sample.

In [None]:
%%capture
!pip install chess
!pip install unsloth

In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Generate chess SFT data with Unsloth and SAVE Top-5 candidates per sample.

import os
import json
import random
from pathlib import Path
from typing import List, Dict

import argparse
import chess
import chess.pgn
import chess.engine
from tqdm import tqdm


In [None]:
%%capture
!apt-get install stockfish

In [None]:
import chess
import chess.engine
from datasets import load_dataset, Dataset
from typing import List, Dict
from tqdm import tqdm
import torch , gc

# Configuration
engine_path = "/usr/games/stockfish"  # Update this path
SYSTEM_PROMPT = (
    "You are a chess expert using reasoning. Use <reasoning> </reasoning> for your reasoning and <answer> </answer> tags."
)

def generate_prompt(fen, candidates, best_move):
  candidates = candidates[1:3] if len(candidates) > 0 else candidates[0]
  prompt = f"""Analyze the following chess position and determine the single best move.

For each candidate move, give one concise sentence explaining its key strength or weakness.
Then, in 1–2 sentences, justify which move is objectively the best and why it is superior.

Position (FEN): {fen}
Candidate moves (SAN): {candidates}
Best move: {best_move}

Respond strictly in this XML format:
<reasoning>
[Concise comparison and explanation of all moves, followed by why the best move is superior.]
</reasoning>
<answer>
[Best move in SAN notation]
</answer>

Rules:
- Only include the provided candidate moves in your reasoning.
- The <answer> tag must contain exactly one SAN move — the best move.
- Do NOT include any text outside the XML tags.

Example:
<reasoning>
d4 controls the center and opens lines for the light-squared bishop.
Nc3 develops a piece but doesn't contest the center as directly.
Nf3 is solid but less ambitious. h3 and c3 are slow moves that fail to fight for space.
Therefore, d4 is best because it develops control and supports classical central strategy.
</reasoning>
<answer>
d4
</answer>
  """
  return prompt

def get_best_moves(fen: str, engine_path: str, num_moves: int = 5) -> List[Dict]:
    """Get top N moves from Stockfish analysis."""
    board = chess.Board(fen)
    engine = chess.engine.SimpleEngine.popen_uci(engine_path)

    try:
        best_moves = engine.analyse(
            board,
            chess.engine.Limit(depth=18),
            multipv=num_moves
        )
        moves_data = []
        for info in best_moves:
          move = info["pv"][0]
          san_move = board.san(move)
          moves_data.append(san_move)

        return moves_data
    finally:
        engine.quit()
def load_model():
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="unsloth/Qwen2.5-Coder-7B-Instruct",
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True,
    )
    FastLanguageModel.for_inference(model)
    return model, tokenizer
def generate_answer(model, tokenizer, prompt: str,max_retries=3) -> str:
    for attempt in range(max_retries):
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt},
        ]

        formatted_prompt = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=512,
                temperature=0.7 + (attempt * 0.1),  # Increase temp on retries
                do_sample=True,
                top_p=0.9,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
            )

        generated = tokenizer.decode(
            outputs[0][inputs['input_ids'].shape[1]:],
            skip_special_tokens=True
        )

        answer = generated.strip()

        # Check if tags exist
        has_reasoning = "<reasoning>" in answer.lower() and "</reasoning>" in answer.lower()
        has_answer = "<answer>" in answer.lower() and "</answer>" in answer.lower()

        if has_reasoning and has_answer:
            print(f"✓ Valid answer generated (attempt {attempt + 1})")
            print(answer)
            return answer
        else:
            print(f"✗ Invalid answer on attempt {attempt + 1} - missing tags")
            if not has_reasoning:
                print("  Missing: <reasoning> tags")
            if not has_answer:
                print("  Missing: <answer> tags")

            if attempt < max_retries - 1:
                print(f"  Retrying...")
            else:
                print(f"  Failed after {max_retries} attempts. Returning anyway.")
                print(answer)
                return answer

    return answer

In [None]:
dataset = load_dataset("czovekboti/chessdata", split="train")
dataset[0]

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


chessData.csv:   0%|          | 0.00/795M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/12958035 [00:00<?, ? examples/s]

{'FEN': 'rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1',
 'Evaluation': '-10'}

In [None]:
#torch.cuda.empty_cache()
#gc.collect()

77

In [None]:
def process_dataset(dataset, engine_path: str, model, tokenizer, max_examples: int = None):
    """Process dataset: get moves, create prompts, generate answers."""
    results = []

    # Limit dataset if needed
    dataset_slice = dataset if max_examples is None else dataset.select(range(min(max_examples, len(dataset))))

    for idx, data in enumerate(dataset_slice):
        try:
            fen = data["FEN"]
            fen = fen[0] if isinstance(fen, list) else fen
            top_5_moves = get_best_moves(fen, engine_path, num_moves=5)

            if not top_5_moves or len(top_5_moves) == 0:
                continue

            best_move = top_5_moves[0]
            candidates = ", ".join(top_5_moves)

            prompt = generate_prompt(fen, candidates, best_move)

            print(f"\n{'='*60}")
            print(f"Example {idx}")
            print(f"{'='*60}")
            print(prompt)
            print(f"{'='*60}\n")


            answer = generate_answer(model, tokenizer, prompt)
            results.append({
                "fen": fen,
                "top_5_moves": top_5_moves,
                "answer": answer,
            })

        except Exception as e:
            print(f"\nError at example {idx}: {e}")
            continue

    return results


In [None]:

from unsloth import FastLanguageModel
def save_results(results: List[Dict], output_name: str = "chess_training_data"):
    """Save results to both HuggingFace dataset and JSON."""
    # Save as HuggingFace dataset
    dataset = Dataset.from_list(results)
    dataset.save_to_disk(f"./{output_name}")

    # Save as JSON for easy viewing
    with open(f"{output_name}.json", "w") as f:
        json.dump(results, f, indent=2)

    print(f"Saved {len(results)} examples to {output_name}")

if __name__ == "__main__":
    print("="*60)
    print("CHESS TRAINING DATA GENERATOR")
    print("="*60)
    # Load model
    print("\n[1/4] Loading AI model...")
    model, tokenizer = load_model()
    # Load dataset
    print("\n[2/4] Loading chess dataset...")
    dataset = load_dataset("czovekboti/chessdata", split="train")
    print(f"Loaded {len(dataset)} positions")

    # Process dataset
    print("\n[3/4] Generating training data...")
    results = process_dataset(
        dataset,
        engine_path,
        model,
        tokenizer,
        #============================================================
        max_examples=None  # < num of board examples
        #===========================================================
    )

    # Save final results
    print("\n[4/4] Saving final results...")
    save_results(results, "chess_training_data_final")

    print("\n" + "="*60)
    print(f"COMPLETE! Generated {len(results)} training examples")
    print("="*60)

    # Display example
    if results:
        print("\nEXAMPLE OUTPUT:")
        print("-"*60)
        print(f"FEN: {results[0]['fen']}")
        print(f"Top 5 Moves: {results[0]['top_5_moves']}")
        print(f"\nPrompt:\n{results[0]['prompt'][:200]}...")
        print(f"\nAnswer:\n{results[0]['answer']}")
        print("-"*60)

NotImplementedError: Unsloth currently only works on NVIDIA GPUs and Intel GPUs.