In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes


In [2]:
from unsloth import FastLanguageModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/MyDrive/PreProces/2100_elo.csv')

In [None]:
df

Unnamed: 0,AN,ECO,moves_list
0,1. d4 d5 2. Nf3 Nf6 3. Bf4 c6 4. e3 Bg4 5. Be2...,D02,"'1.d4', 'd5', '2.Nf3', 'Nf6', '3.Bf4', 'c5', '..."
1,1. e4 { [%eval 0.31] } 1... c5 { [%eval 0.37] ...,B21,"'1.e4', 'c5', '2.d4', 'cxd4', '3.c3', 'dxc3', ..."
2,1. Nc3 d5 2. b3 d4 3. Ne4 f5 4. Ng3 f4 5. Ne4 ...,A00,"'1.b4', 'Nf6', '2.Bb2', 'g6', '3.c4', 'Bg7', '..."
3,1. e4 e6 2. d4 d5 3. Nd2 Nf6 4. Bd3 c5 5. exd5...,C05,"'1.e4', 'e6', '2.d4', 'd5', '3.Nd2', 'Nf6', '4..."
4,1. e3 { [%eval 0.1] } 1... g6 { [%eval 0.31] }...,A00,"'1.b4', 'Nf6', '2.Bb2', 'g6', '3.c4', 'Bg7', '..."
...,...,...,...
237086,1. d4 Nf6 2. c4 e6 3. Nc3 b6 4. Qc2 d6 5. e4 N...,E20,"'1.d4', 'Nf6', '2.c4', 'e6', '3.Nc3', 'Bb4', '..."
237087,1. d4 d5 2. Nf3 Nf6 3. g3 c5 4. Bg2 Nc6 5. O-O...,D02,"'1.d4', 'd5', '2.Nf3', 'Nf6', '3.Bf4', 'c5', '..."
237088,1. e4 { [%eval 0.18] } 1... c5 { [%eval 0.33] ...,B46,"'1.e4', 'c5', '2.Nf3', 'e6', '3.d4', 'cxd4', '..."
237089,1. e4 c5 2. d4 cxd4 3. c3 dxc3 4. Nxc3 e6 5. N...,B21,"'1.e4', 'c5', '2.d4', 'cxd4', '3.c3', 'dxc3', ..."


In [None]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)
EOS_TOKEN = tokenizer.eos_token

==((====))==  Unsloth: Fast Llama patching release 2024.6
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Unsloth 2024.6 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
chess_prompt = """Analyze the following chess game played between [Player 1] (White) and [Player 2] (Black).


### Opening Code (ECO):
{}
### Opening variants in Game Notation (PGN)
{}

** explain the key strategic decisions made by both players. Discuss the turning points and missed opportunities.

**Use what you learned from the opening, identify:**

* **Opening Principles:** How well did the opening choices follow sound opening principles?
* **Tactical Opportunities:** Were there any tactical opportunities missed during the game?
* **Positional Advantages:** How did the players maneuver their pieces to gain positional advantages?
* **Endgame Technique:** How effectively did the players convert their advantages in the endgame?
### For this game in PGN Notation
{}
**Finally, based on your analysis, what are the key takeaways for improving one's chess game?**
"""
def formatting_prompts_func(examples):
    game_notations = examples['AN']
    eco_codes = examples['ECO']
    openings = examples['moves_list']
    texts = []
    for  eco_code,opening,game_notation in zip(eco_codes,openings,game_notations):
        text = chess_prompt.format(eco_code,opening,game_notation) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts }



In [None]:
dataset = Dataset.from_pandas(df)
dataset = dataset.map(formatting_prompts_func, batched=True)
del df

Map:   0%|          | 0/237091 [00:00<?, ? examples/s]

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,  # Reduced number of processor cores for preprocessing
    packing = False,  # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 1,  # Reduced batch size to save memory
        gradient_accumulation_steps = 8,  # Increased accumulation steps to compensate for smaller batch size
        warmup_steps = 5,
        max_steps = 45,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",  # Using memory-efficient optimizer
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer_stats = trainer.train()




Map:   0%|          | 0/237091 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 237,091 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 8
\        /    Total batch size = 8 | Total steps = 45
 "-____-"     Number of trainable parameters = 167,772,160


Step,Training Loss
1,1.0256
2,1.2104
3,1.096
4,0.8599
5,0.966
6,0.8816
7,0.7933
8,0.7198
9,0.6232
10,0.5453


In [7]:
# Push the model to the Hugging Face Hub
model.push_to_hub("acunamartin1426/llama3-chess-finetune", use_auth_token="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")

# Push the tokenizer to the same repository
tokenizer.push_to_hub("acunamartin1426/llama3-chess-finetune", use_auth_token="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")



Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.65G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]



README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/acunamartin1426/llama3-chess-finetune/commit/74a2c7f3d22e9be42e0bc8a59dc9ad5525970eba', commit_message='Upload tokenizer', commit_description='', oid='74a2c7f3d22e9be42e0bc8a59dc9ad5525970eba', pr_url=None, pr_revision=None, pr_num=None)

In [8]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("acunamartin1426/llama3-chess-finetune")
model = AutoModelForCausalLM.from_pretrained("acunamartin1426/llama3-chess-finetune")

tokenizer_config.json:   0%|          | 0.00/50.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/464 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/132k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.65G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

In [9]:
# Define your input PGN string
pgn = "1. e4 e5 2. Nf3 Nc6 3. Bb5 a6"

# Tokenize the input PGN string
inputs = tokenizer(pgn, return_tensors="pt")

# Generate the next move using the model
outputs = model.generate(inputs["input_ids"], max_length=100)

# Decode the generated move
generated_move = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the generated move
print("Generated move:", generated_move)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated move: 1. e4 e5 2. Nf3 Nc6 3. Bb5 a6 4. Ba4 Nf6 5. O-O Bc5 6. Re1 d6 7. c3 O-O 8. h3 Ne8 9. Nbd2 c5 10. Nf1 Ng6 11. Ne3 Ne8 12. d4 cxd4 13. cxd4 Bb
