In [None]:
%%capture
%pip install accelerate peft bitsandbytes transformers trl mlflow
%pip install --force-reinstall 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-win_amd64.whl'


In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer
import mlflow

In [None]:
base_model = "meta-llama/Llama-3.2-1B-Instruct"
dataset = load_dataset("midnightdove-dev/olimpiada", split="train")

README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

output_converted.csv:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/155 [00:00<?, ? examples/s]

In [None]:
from google.colab import userdata

compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

access_token = userdata.get("HF_TOKEN")
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    token=access_token,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, token=access_token, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [None]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)
mlflow.set_experiment("MLflow PEFT Tutorial")

training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=15,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    run_name="llama-3.2-1B-Instruct-olympiads",
    report_to="mlflow",
)
torch.cuda.empty_cache()
import gc
gc.collect()
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="response",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

trainer.train()

2024/11/10 01:59:15 INFO mlflow.tracking.fluent: Experiment with name 'MLflow PEFT Tutorial' does not exist. Creating a new experiment.

Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/155 [00:00<?, ? examples/s]

Step,Training Loss
25,2.7509
50,2.5613
75,2.4491
100,2.3644
125,2.334
150,2.2642
175,2.2783
200,2.1892
225,2.1538
250,2.1678


TrainOutput(global_step=585, training_loss=2.0982474107008713, metrics={'train_runtime': 3706.9736, 'train_samples_per_second': 0.627, 'train_steps_per_second': 0.158, 'total_flos': 1.3912491030700032e+16, 'train_loss': 2.0982474107008713, 'epoch': 15.0})

In [None]:
new_model="Llama-3.2-1B-Instruct-olympiads"
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

trainer.push_to_hub()

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/27.3M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.56k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/midnightdove-dev/results/commit/f4d399008f5334847d6196c8025b752813b1897b', commit_message='End of training', commit_description='', oid='f4d399008f5334847d6196c8025b752813b1897b', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
%pip install mlflow
from mlflow.models import infer_signature

sample = dataset[1]

# MLflow infers schema from the provided sample input/output/params
signature = infer_signature(
    model_input=sample["prompt"],
    model_output=sample["response"],
    # Parameters are saved with default values if specified
    params={"max_new_tokens": 256, "repetition_penalty": 1.15, "return_full_text": False},
)

import mlflow

# Get the ID of the MLflow Run that was automatically created above
last_run_id = mlflow.last_active_run().info.run_id

# Save a tokenizer without padding because it is only needed for training
tokenizer_no_pad = AutoTokenizer.from_pretrained(base_model, add_bos_token=True)

# If you interrupt the training, uncomment the following line to stop the MLflow run
# mlflow.end_run()
prompt_template = """Generează un subiect de olimpiadă de {subject}, clasa a {grade}-a, nivel de faza {stage}!"""
with mlflow.start_run(run_id=last_run_id):
    mlflow.log_params(peft_params.to_dict())
    mlflow.transformers.log_model(
        transformers_model={"model": trainer.model, "tokenizer": tokenizer_no_pad},
        signature=signature,
        artifact_path="model",  # This is a relative path to save model files within MLflow run
    )




2024/11/10 03:01:18 INFO mlflow.transformers: Overriding save_pretrained to False for PEFT models, following the Transformers behavior. The PEFT adaptor and config will be saved, but the base model weights will not and reference to the HuggingFace Hub repository will be logged instead.
2024/11/10 03:01:19 INFO mlflow.transformers: Skipping saving pretrained model weights to disk as the save_pretrained argumentis set to False. The reference to the HuggingFace Hub repository meta-llama/Llama-3.2-1B-Instruct will be logged instead.


README.md:   0%|          | 0.00/41.7k [00:00<?, ?B/s]

LICENSE.txt:   0%|          | 0.00/7.71k [00:00<?, ?B/s]

2024/11/10 03:01:20 INFO mlflow.transformers: A local checkpoint path or PEFT model is given as the `transformers_model`. To avoid loading the full model into memory, we don't infer the pip requirement for the model. Instead, we will use the default requirements, but it may not capture all required pip libraries for the model. Consider providing the pip requirements explicitly.


In [None]:
mlflow_model = mlflow.pyfunc.load_model(f"runs:/{last_run_id}/model")
ans = mlflow_model.predict("Generează un subiect de olimpiadă de Biologie, clasa a 9-a, nivel de faza locala")[0]
ans


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'.\n\nSursa situdinare:\n\nLa 86% din sursele situdinare se găsesc în terenuri agricole. Peste 70% sunt râpeşti (floare), peste 20% pește (brânză, salată), peste 5% plante (morcănele, prigipanele), peste 4% animale (porci, măcaroapile).  \nO altă orice leguminoasă este: urda, cais, cozonac; fructe dulce (prună, mure, ciuperci); arin, soia, migdalia, cîrnaț; epretă, iardhie, mămăsina, porumb; grâu, mărgărescă, păluni, soia, tarhiu; sorghe, burtă, câmpulungiu, mămăsina, vinară. \n\nSubstanţele organice:\n1) Saturant - sustena care ajută la reducerea concentrului de oxigen în'