# Train a Chess LLM on xLAN Datasets/Validate Model

In [1]:
import torch
from transformers import AutoModelForCausalLM
from src.train import ChessTrainer
from peft import LoraConfig, get_peft_model

## Dataset

- use datasets in ./data/training to train your model
- use DownloadUpload.ipynb to Download a dataset from Hugging Face
- use DataPreProcessing.ipynb to create your own dataset

### CUDA
Check if CUDA is available.

In [2]:
print(torch.cuda.is_available())

True


### Config

### Datasets

In [3]:
dataset = "data/tokens/kasparov_max_768_bos.tok"

## HYPERPARAMETERS
BATCH_SIZE = 16  # use the largest batch size that fits on your GPU
SAVE_STEPS = 2000  # how often to save a checkpoint
LOGGING_STEPS = 50  # how often to validate model and publish it to Weights & Biases
EPOCHS = 5  # how many epochs to train for - how many times to go through the dataset
LEARNING_RATE = 0.0001  # learning rate - how fast the model should learn
SKIP_VALIDATION = True  # skip validation and only save model checkpoints
WEIGHTS_AND_BIASES_ENABLED = True  # enable logging to Weights & Biases
USE_FP16 = True  # enable mixed precision training (GPU only)
XLANPLUS_ENABLED = True  # use xLanPlus tokenizer

PEFT_BASE_MODEL = "Leon-LLM/Leon-Chess-350k-Plus" # base model to be loaded (from hugging face) for fine-tuning

## CONFIG FOR FINE-TUNING
R = 128
LORA_ALPHA = 32
LORA_DROPOUT = 0.1

peft_config = LoraConfig(  # https://huggingface.co/docs/peft/v0.10.0/en/package_reference/lora#peft.LoraConfig
    task_type="CAUSAL_LM", # This does not need to be changed for our use case
    inference_mode=False, # don't change this for training, only later for inference
    r=R,  # lower means faster training, but might underfit because of less complexity (experiments don't show that training time increases, which is rather weird)
    lora_alpha=LORA_ALPHA,  # scaling factor that adjusts the magnitude of the combined result (balances the pretrained model’s knowledge and the new task-specific adaptation)
    lora_dropout=LORA_DROPOUT,
    # use_rslora=True, # might work better (not tried yet)
)

peft_model = get_peft_model(AutoModelForCausalLM.from_pretrained(PEFT_BASE_MODEL), peft_config)

## MODEL NAME
output_dir = "models/"
model_name = f"{PEFT_BASE_MODEL.split('/')[1]}_LoRA_kasparov_{EPOCHS}E_{LEARNING_RATE}LR"

## SAVING MODEL
output_dir = f"{output_dir}{model_name}"

print(f"Training {model_name} on {dataset} with batch size = {BATCH_SIZE} for {EPOCHS} epochs")
print(f"Saving model to {output_dir}")



Training Leon-Chess-350k-Plus_LoRA_kasparov_5E_0.0001LR on data/tokens/kasparov_max_768_bos.tok with batch size = 16 for 5 epochs
Saving model to models/Leon-Chess-350k-Plus_LoRA_kasparov_5E_0.0001LR




### Train

In [4]:
import os

print(os.getcwd())
print(os.listdir())

/teamspace/studios/this_studio/ChessOps
['.git', '.gitignore', 'README.md', 'add_bos.py', 'backup', 'checkDuplicates.py', 'create_tokens.py', 'data', 'dataPreProcessing.ipynb', 'env.yaml', 'finetune.ipynb', 'finetune.py', 'inference.ipynb', 'output', 'pgn_to_xlan.py', 'pipeline.ipynb', 'pipeline.png', 'pipeline.yaml', 'remove_lines.py', 'requirements.txt', 'src', 'statistics', 'streamlit', 'wandb', 'models']


In [5]:
trainer = ChessTrainer(
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    epochs=EPOCHS,
    input_file=dataset,
    output_dir=output_dir,
    save_steps=SAVE_STEPS,
    logging_steps=LOGGING_STEPS,
    skip_validation=SKIP_VALIDATION,
    weight_and_biases=WEIGHTS_AND_BIASES_ENABLED,
    use_FP16=USE_FP16,
    notation="xLANplus" if XLANPLUS_ENABLED else "xLAN",
    peft=peft_model,
)

trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mschmila7[0m ([33mleon-llm[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
50,3.3313
100,2.1396
150,1.9056
200,1.7637
250,1.6706
300,1.5721
350,1.4075
400,1.2604
450,1.1861
500,1.1595




VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁▂▂▃▃▄▄▅▆▆▇▇██
train/global_step,▁▂▂▃▃▄▄▅▆▆▇▇██
train/grad_norm,▇▁▅▁▄▂▆▆▇▆▇█▇
train/learning_rate,█▇▇▆▆▅▄▄▃▃▂▂▁
train/loss,█▄▃▃▃▂▂▁▁▁▁▁▁

0,1
total_flos,4383443753994240.0
train/epoch,5.0
train/global_step,665.0
train/grad_norm,0.31985
train/learning_rate,0.0
train/loss,1.1146
train_loss,1.58668
train_runtime,381.3096
train_samples_per_second,27.825
train_steps_per_second,1.744


# Push Model to Hugging Face

In [6]:
# check model_name

model_name

'Leon-Chess-350k-Plus_LoRA_kasparov_5E_0.0001LR'

In [7]:
from huggingface_hub import notebook_login

# notebook_login()
peft_model.push_to_hub(model_name)



adapter_model.safetensors:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/larscarl/Leon-Chess-350k-Plus_LoRA_kasparov_5E_0.0001LR/commit/b09751d5b499ec45e698d36b11c59bb7d832bb66', commit_message='Upload model', commit_description='', oid='b09751d5b499ec45e698d36b11c59bb7d832bb66', pr_url=None, pr_revision=None, pr_num=None)

# Load Model from Disk (fine-tuned LoRA model)

In [None]:
# https://huggingface.co/docs/transformers/main/en/peft
model_dir = "./Leon-LLM-Models/V45_GPT2_19k_20E_xLANplus/checkpoint-24000"
peft_model_id = "./Leon-LLM-Models/V55_V45_GPT2_19k_20E_xLANplus_19k_1E_r128/V55_V45_GPT2_19k_20E_xLANplus_19k_1E_r128"
loaded_model = AutoModelForCausalLM.from_pretrained(model_dir)
loaded_model.load_adapter(peft_model_id)


In [None]:
from src.generate_prediction import generate_prediction

loaded_model.inference_mode = True
loaded_model.eval()
input = "Pd2d4 Pd7d5 Pc2c4 Pc7c6"
loaded_model.to("cuda" if torch.cuda.is_available() else "cpu")
generate_prediction(input=input, num_tokens_to_generate=3, model=loaded_model, token_path="./src/tokenizer/xlan_tokens.json")[0]

# Inference (for fine-tuned LoRA model)

In [None]:
# load model from hugging face

from src.generate_prediction import generate_prediction

peft_model = get_peft_model(AutoModelForCausalLM.from_pretrained(PEFT_BASE_MODEL), peft_config)
peft_model.inference_mode = True
peft_model.eval()
input = "Pd2d4 Pd7d5 Pc2c4 Pc7c6"
peft_model.to("cuda" if torch.cuda.is_available() else "cpu")
generate_prediction(input=input, num_tokens_to_generate=3, model=peft_model, token_path="./src/tokenizer/xlan_tokens.json")[0]