In [15]:
%pip install einops
%pip install peft
%pip install trl
%pip install tensorboard
%pip install -q -U https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl

Note: you may need to restart the kernel to use updated packages.




In [78]:
import os
from dataclasses import dataclass, field
from typing import Optional
import json

import torch
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
)
from tqdm.notebook import tqdm

from trl import SFTTrainer

In [127]:
dataset = load_dataset("json", data_files="./train.json", field='data', split='all')
dataset = dataset.train_test_split(test_size=0.1)


Downloading and preparing dataset json/default to C:/Users/danm/.cache/huggingface/datasets/json/default-9ad94e224e55e78c/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to C:/Users/danm/.cache/huggingface/datasets/json/default-9ad94e224e55e78c/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.


In [128]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 717166
    })
    test: Dataset({
        features: ['text'],
        num_rows: 79686
    })
})

In [129]:
tokenizer = AutoTokenizer.from_pretrained("../models/phi-2", trust_remote_code=True)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [130]:
tokenizer.add_tokens(["<|im_start|>", "<PAD>"])
tokenizer.pad_token = "<PAD>"
tokenizer.add_special_tokens(dict(eos_token="<|im_end|>"))

1

In [131]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype='float16',
    bnb_4bit_use_double_quant=False,
)

In [132]:
model = AutoModelForCausalLM.from_pretrained(
        "../models/phi-2", 
        quantization_config=bnb_config, 
        device_map = 'auto',
        trust_remote_code=True,
        use_auth_token=True,
    )
model.config.eos_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [133]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True) 

lora_config = LoraConfig(
    r=32, 
    lora_alpha=32, 
    target_modules = [ "q_proj", "k_proj", "v_proj", "dense" ],
    modules_to_save = ["lm_head", "embed_tokens"],
    lora_dropout=0.1, 
    bias="none", 
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

model.config.use_cache = False

In [119]:
peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['q_proj', 'k_proj', 'v_proj','dense','fc1','fc2',]
)

In [None]:
torch_device = "cuda" if torch.cuda.is_available() else "cpu"

In [141]:
from transformers import TrainingArguments, Trainer

# dataset-specific parameters
bs=1     # batch size for training
bs_eval=16    # batch size for evaluation
ga_steps=16  # gradient accumulation steps
lr=0.00002  # learning rate
epochs=2

steps_per_epoch=len(dataset["train"])//(bs*ga_steps)

args = TrainingArguments(
    output_dir="../models/phi-2-mlb",
    per_device_train_batch_size=bs,
    per_device_eval_batch_size=bs_eval,
    evaluation_strategy="steps",
    logging_steps=1,
    eval_steps=steps_per_epoch//2,    # 2 evals per epoch
    save_steps=steps_per_epoch//100,
    save_total_limit=3,     # save once per epoch
    gradient_accumulation_steps=ga_steps,
    num_train_epochs=epochs,
    lr_scheduler_type="constant",
    optim="paged_adamw_32bit",      # val_loss will go nan with paged_adamw_8bit
    learning_rate=lr,
    group_by_length=False,
    bf16=True,        
    ddp_find_unused_parameters=False,
    
)



In [146]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=2048,
    tokenizer=tokenizer,
    args=args,
    packing=False,
)

Loading cached processed dataset at C:\Users\danm\.cache\huggingface\datasets\json\default-9ad94e224e55e78c\0.0.0\0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\cache-7fe9337e62cd197f.arrow
Loading cached processed dataset at C:\Users\danm\.cache\huggingface\datasets\json\default-9ad94e224e55e78c\0.0.0\0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51\cache-d37a561bab1eb45d.arrow


In [147]:
trainer.train()

  0%|          | 0/89644 [00:00<?, ?it/s]

{'loss': 1.8227, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.7298, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.7161, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.7308, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.7457, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.7392, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.6812, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.6583, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.6329, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.5237, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.6116, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.5893, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.5667, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.5725, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.5247, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.5128, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.4649, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1.4071, 'learning_rate': 2e-05, 'epoch': 0.0}
{'loss': 1



{'loss': 0.4009, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.373, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.405, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.3918, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4515, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4158, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4069, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.3862, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.3694, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4079, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.3902, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4093, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4045, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.3966, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.3875, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4038, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.4133, 'learning_rate': 2e-05, 'epoch': 0.01}
{'loss': 0.3973, 'learning_rate': 2e-05, 'epoch': 

In [77]:
trainer.save_model("../models/phi-2-mlb/")

In [None]:
trainer.train(resume_from_checkpoint="../models/phi-2-mlb/")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
from peft import PeftModel
import torch

# base model
base_path="../models/phi-2"  

# adapters: path to folder with adapter_model.safetensors
adapter_path="../models/phi-2-mlb/checkpoint-448" 

# where to save merged model
save_to="../models/phi-2-mlb/"       

# Load model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained(
    base_path,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(base_path)

# Add/set tokens same tokens to base model before merging, like we did before training  
tokenizer.add_tokens(["<|im_start|>", "<PAD>"])
tokenizer.pad_token = "<PAD>"
tokenizer.add_special_tokens(dict(eos_token="<|im_end|>"))

base_model.config.eos_token_id = tokenizer.eos_token_id

# Set a default Generation configuration: Llama precise
generation_config = GenerationConfig(
    max_new_tokens=100, 
    temperature=0.7,
    top_p=0.1,
    top_k=40,
    repetition_penalty=1.18,
    do_sample=True,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id,
)

# Load LoRA and merge
model = PeftModel.from_pretrained(base_model, adapter_path)
model = model.merge_and_unload()

model.save_pretrained(save_to, safe_serialization=True, max_shard_size='4GB')
tokenizer.save_pretrained(save_to)
generation_config.save_pretrained(save_to)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
from peft import PeftModel
import torch

model_path="../models/phi-2-mlb/"   

prompt="Instruct: {\"input\": {\"pitcher\": {\"id\": 460024, \"name\": \"luke hochevar\"}, \"batter\": {\"id\": 110029, \"name\": \"bobby abreu\"}, \"p_throws\": \"R\", \"stand\": \"L\", \"inning_topbot\": \"Top\", \"inning\": 1, \"outs_when_up\": 1, \"on_1b\": \"\", \"on_2b\": {\"id\": 435062, \"name\": \"howie kendrick\"}, \"on_3b\": \"\", \"home_score\": 0, \"away_score\": 0}}? \n"

model = AutoModelForCausalLM.from_pretrained(
        model_path,    
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )
tokenizer = AutoTokenizer.from_pretrained(model_path) 

input_tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
output_tokens = model.generate(**input_tokens)

output = tokenizer.decode(
    output_tokens[0][len(input_tokens[0]):],
    skip_special_tokens=True
    )               

print(output)