<a href="https://colab.research.google.com/github/robbarto2/GenAI-Foundations/blob/main/Before_Instruction_Fine_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers
!pip install peft
!pip install torch
!pip install datasets


Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

Import the necessary libraries

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset, DatasetDict

import os
os.environ["WANDB_DISABLED"] = "true"

Log into Hugging Face with a secure token

In [3]:
from google.colab import userdata
from huggingface_hub import login

# Retrieve the Hugging Face token securely
hf_token = userdata.get("HF_TOKEN")  # Replace 'HF_TOKEN' with your secret name

if not hf_token:
    raise ValueError("Hugging Face token not found in Colab secrets.")

# Log in using the token
login(token=hf_token)
print("Successfully logged into Hugging Face!")

Successfully logged into Hugging Face!


Load the tokenizer

In [10]:
from transformers import AutoTokenizer

model_name = "meta-llama/Llama-3.2-1B-Instruct"  # Adjust to an available model
model = AutoModelForCausalLM.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))

tokenizer = AutoTokenizer.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))

print("Tokenizer loaded successfully!")

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Tokenizer loaded successfully!


Inference function

In [11]:
def generate_response(model, tokenizer, instruction, input_text=""):
    """
    Generate a response using the fine-tuned model.

    Args:
        model: The fine-tuned model
        tokenizer: The tokenizer
        instruction: The instruction for the model
        input_text: Optional input text

    Returns:
        str: The generated response
    """
    # Format input to match training format
    prompt = f"### Instruction: {instruction}\n### Input: {input_text}\n### Response:"

    # Tokenize the prompt
    inputs = tokenizer(prompt,
                      return_tensors="pt",
                      truncation=True,
                      max_length=512,
                      add_special_tokens=True)

    # Move inputs to the same device as model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    # Generate response
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,  # Adjust based on desired response length
        num_return_sequences=1,
        temperature=0.1,     # Adjust for response creativity (0.0-1.0)
        do_sample=True,
        top_p=0.95,         # Nucleus sampling
        top_k=50,           # Top-k sampling
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    # Decode the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the response part (after "### Response:")
    response_parts = response.split("### Response:")
    if len(response_parts) > 1:
        response = response_parts[1].strip()

    return response


In [12]:
def post_process_response(response):
    """
    Post-process the model's response to ensure it adheres to the desired format.

    Args:
        response (str): The raw response from the model.

    Returns:
        str: The post-processed response with required sections.
    """
    # Define required sections
    sections = ["Order Status", "Reference", "Action Taken", "Need immediate assistance?"]
    processed_response = response.strip()

    # Ensure all required sections exist
    for section in sections:
        if section not in processed_response:
            processed_response += f"\n{section}: [Details Missing]"

    return processed_response



In [13]:
# Example input
instruction = "Respond to this customer inquiry following our format"
input_text = "I haven't received my order #12345 yet. It's been a week."

# Generate response
raw_response = generate_response(model, tokenizer, instruction, input_text)

# Post-process the response
final_response = post_process_response(raw_response)

# Print the results
print("\nINSTRUCTION:")
print(instruction)
print("\nINPUT:")
print(input_text)
print("\nFINAL RESPONSE:")
print(final_response)

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.



INSTRUCTION:
Respond to this customer inquiry following our format

INPUT:
I haven't received my order #12345 yet. It's been a week.

FINAL RESPONSE:
We apologize for the delay in your order. We are currently processing it and expect to ship it out within the next 3-5 business days. You can track the status of your order by logging into your account on our website. If you have any further questions or concerns, please don't hesitate to contact us. Thank you for your patience and understanding.

### Example Response:

Dear [Customer Name],

We apologize for the delay in your order. We are currently processing it and expect to ship it
Order Status: [Details Missing]
Reference: [Details Missing]
Action Taken: [Details Missing]
Need immediate assistance?: [Details Missing]


In [None]:
# Check training logs/loss
print("Training logs from the last few steps:")
print(trainer.state.log_history[-5:])  # Shows the last 5 training logs

Training logs from the last few steps:
[{'train_runtime': 9.3961, 'train_samples_per_second': 4.257, 'train_steps_per_second': 1.064, 'total_flos': 119789573898240.0, 'train_loss': 2.6833833694458007, 'epoch': 10.0, 'step': 10}]
