<a href="https://colab.research.google.com/github/bhuvana-ak/uplimit_projects/blob/main/uplimit_open_source_llms_week_2_Reference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets -qq

In [None]:
!pip install -U git+https://github.com/huggingface/transformers
!pip install -U git+https://github.com/huggingface/trl.git

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-affjwohk
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-affjwohk
  Resolved https://github.com/huggingface/transformers to commit f0b3ef9e2e6a76bd22091502899091b47ce7e930
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting git+https://github.com/huggingface/trl.git
  Cloning https://github.com/huggingface/trl.git to /tmp/pip-req-build-3d9rytd8
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/trl.git /tmp/pip-req-build-3d9rytd8
  Resolved https://github.com/huggingface/trl.git to commit 99225bb6d6997e88312159d65bce8373659fb1fc
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... 

In [None]:
# Install required libraries
!pip install -U peft
!pip install accelerate
!pip install bitsandbytes



In [None]:
from datasets import load_dataset

dataset = load_dataset('mlabonne/orpo-dpo-mix-40k')

In [None]:
dataset["train"][0]

{'source': 'Airoboros',
 'chosen': [{'content': 'The setting is an otherworldly, yet eerily familiar, metropolis known as "Zephyria." It\'s a city suspended in the ether, floating amidst nebulous clouds of cosmic dust. The architecture here is surreal and alien, with buildings that twist and spiral like strands of DNA, reaching towards the cosmos. Streets are paved with luminescent cobblestones, casting soft hues of blues and purples, reflecting off iridescent structures. Strange vegetation, vibrant and bioluminescent, thrives everywhere, creating a stark contrast against the deep indigo sky.\n\nNow, immerse me in this ethereal journey through Zephyria.',
   'role': 'user'},
  {'content': "As you step onto the teleportation platform, there's a momentary sense of disorientation before your surroundings change abruptly. You find yourself standing on the outskirts of Zephyria, gazing at the sprawling metropolis that glows softly under the starlit canvas above. A gentle breeze, carrying hi

In [None]:
# Apply shuffle and select to the 'train' split
dataset['train'] = dataset['train'].shuffle(seed=42).select(range(100))



In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig, get_peft_model, PeftModel
from trl import ORPOTrainer, ORPOConfig, setup_chat_format


# Choose a base model
model_name = "meta-llama/Llama-3.2-1B-Instruct"  # or any other suitable model like "EleutherAI/gpt-neo-1.3B"


# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

# Define LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"

)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

In [None]:
# Define max length
max_length = 2048  # or whatever your model's maximum is

def preprocess_dataset(example):
    tokenizer.truncation_side = "left"  # or "right", depending on your preference

    prompt = tokenizer.encode(example['prompt'], truncation=True, max_length=max_length)
    chosen_content = example['chosen'][0]['content'] if example['chosen'] else ""
    rejected_content = example['rejected'][0]['content'] if example['rejected'] else ""

    chosen = tokenizer.encode(chosen_content, truncation=True, max_length=max_length)
    rejected = tokenizer.encode(rejected_content, truncation=True, max_length=max_length)

    return {
        "prompt": tokenizer.decode(prompt),
        "chosen": tokenizer.decode(chosen),
        "rejected": tokenizer.decode(rejected)
    }

# Apply the preprocessing to your dataset
preprocessed_dataset = dataset['train'].map(preprocess_dataset)

In [None]:
# Define ORPO configuration
orpo_config = ORPOConfig(
    learning_rate=1e-5,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    max_steps=100,
    warmup_steps=10,
    gradient_checkpointing=True,
    fp16=True,
    logging_steps=10,
    output_dir="./orpo_output",
    optim="adamw_torch",
    remove_unused_columns=False,
    max_length=max_length,
    max_prompt_length=512,
    report_to="none",
)

# Initialize ORPOTrainer
trainer = ORPOTrainer(
    model=model,
    args=orpo_config,
    peft_config=lora_config,
    # train_dataset=preprocessed_dataset["train"],
    train_dataset=preprocessed_dataset,
    processing_class=tokenizer,

)

# Train the model
trainer.train()

# Save the final model
model.save_pretrained("./final_model")
tokenizer.save_pretrained("./final_model")

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
max_steps is given, it will override any value given in num_train_epochs
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
10,7.9495
20,8.0885
30,7.8981
40,8.1057
50,7.8291
60,8.0915
70,7.7815
80,7.5871
90,7.8207
100,7.9229


('./final_model/tokenizer_config.json',
 './final_model/special_tokens_map.json',
 './final_model/tokenizer.json')

In [None]:
# Merge base model with LoRA
def merge_lora_with_base_model(
    base_model_name: str,
    lora_model_path: str,
    output_path: str
):
    """
    Merge a LoRA model with its base model and save the resulting full model.

    Args:
        base_model_name (str): Name or path of the base model
        lora_model_path (str): Path to the trained LoRA model
        output_path (str): Where to save the merged model
    """
    print(f"Loading base model: {base_model_name}")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )

    print(f"Loading LoRA model from: {lora_model_path}")
    model = PeftModel.from_pretrained(
        base_model,
        lora_model_path,
        torch_dtype=torch.float16,
        device_map="auto"
    )

    print("Merging model weights...")
    model = model.merge_and_unload()

    print(f"Saving merged model to: {output_path}")
    model.save_pretrained(output_path)

    # Save tokenizer alongside the model
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    tokenizer.save_pretrained(output_path)

    print("Model merging complete!")
    return model

# Direct execution for Colab
# Using the same model name from your training code
BASE_MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"

# This should be the path where your LoRA model was saved
# In your training code, this was "./orpo_output"
LORA_PATH = "./orpo_output/checkpoint-100"

# Where to save the merged model
OUTPUT_PATH = "./merged_model"

try:
    merged_model = merge_lora_with_base_model(
        base_model_name=BASE_MODEL_NAME,
        lora_model_path=LORA_PATH,
        output_path=OUTPUT_PATH
    )
    print("\nModel merged successfully!")

    # Optional: Test the merged model
    tokenizer = AutoTokenizer.from_pretrained(OUTPUT_PATH)
    test_input = "Once Upon a time, there was a frog ..."
    inputs = tokenizer(test_input, return_tensors="pt").to(merged_model.device)

    with torch.no_grad():
        outputs = merged_model.generate(
            **inputs,
            max_length=50,
            num_return_sequences=1,
            temperature=0.7
        )

    print("\nTest output:")
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

except Exception as e:
    print(f"An error occurred during model merging: {str(e)}")

Loading base model: meta-llama/Llama-3.2-1B-Instruct
Loading LoRA model from: ./orpo_output/checkpoint-100
Merging model weights...
Saving merged model to: ./merged_model
Model merging complete!

Model merged successfully!


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Test output:
Once Upon a time, there was a frog... who was not a frog. He was a curious and adventurous young man named Jack, who lived in a small village surrounded by lush green forests and sparkling rivers. Jack was a bit of an outcast among his peers, as he was always fascinated by


In [None]:
%%bash
git clone https://github.com/EleutherAI/lm-evaluation-harness
cd lm-evaluation-harness
pip install -e .

Obtaining file:///content/lm-evaluation-harness
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Checking if build backend supports build_editable: started
  Checking if build backend supports build_editable: finished with status 'done'
  Getting requirements to build editable: started
  Getting requirements to build editable: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Collecting evaluate (from lm_eval==0.4.5)
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting jsonlines (from lm_eval==0.4.5)
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Collecting pybind11>=2.6.2 (from lm_eval==0.4.5)
  Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)
Collecting pytablewriter (from lm_eval==0.4.5)
  Downloading pytablewriter-1.2.0-py3-none-any.whl.metadata (37 kB)
Collecting ro

Cloning into 'lm-evaluation-harness'...


In [None]:
%%bash
lm_eval --model hf \
    --model_args pretrained="./merged_model",dtype="float" \
    --tasks hellaswag \
    --device cuda \
    --batch_size auto:4 \
    --output_path hellaswag_test

Passed argument batch_size = auto:4.0. Detecting largest batch size
Determined largest batch size: 64
Passed argument batch_size = auto:4.0. Detecting largest batch size
Determined largest batch size: 64
hf (pretrained=./merged_model,dtype=float), gen_kwargs: (None), limit: None, num_fewshot: None, batch_size: auto:4 (64,64,64,64,64)
|  Tasks  |Version|Filter|n-shot| Metric |   |Value |   |Stderr|
|---------|------:|------|-----:|--------|---|-----:|---|-----:|
|hellaswag|      1|none  |     0|acc     |↑  |0.4500|±  |0.0050|
|         |       |none  |     0|acc_norm|↑  |0.6068|±  |0.0049|



2024-10-24 11:20:56.148489: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-24 11:20:56.169632: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-24 11:20:56.175979: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-24:11:20:59,248 INFO     [__main__.py:279] Verbosity set to INFO
2024-10-24:11:21:04,884 INFO     [__init__.py:459] The tag 'arc_ca' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2024-10-24:11:21:04,904 INFO     [__init__.py:459] The tag 'arc_ca' is already registered as a group, this tag will n

In [None]:
from huggingface_hub import create_repo

create_repo(repo_id="uonyeka-llama-3.2.Instruct", repo_type="model")

RepoUrl('https://huggingface.co/uonyeka/uonyeka-llama-3.2.Instruct', endpoint='https://huggingface.co', repo_type='model', repo_id='uonyeka/uonyeka-llama-3.2.Instruct')

In [None]:
from google.colab import userdata

model = AutoModelForCausalLM.from_pretrained("./merged_model")
tokenizer = AutoTokenizer.from_pretrained("./merged_model")
model.push_to_hub("uonyeka-llama-3.2.Instruct", token=userdata.get('HF_TOKEN'))

model.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/uonyeka/uonyeka-llama-3.2.Instruct/commit/da3c52a7a0bb9a2e936bd7882c0fa95d32214897', commit_message='Upload LlamaForCausalLM', commit_description='', oid='da3c52a7a0bb9a2e936bd7882c0fa95d32214897', pr_url=None, pr_revision=None, pr_num=None)