In [3]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model
import ollama
import json

In [5]:
# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if device.type == "cuda":
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


Using device: cpu


In [4]:
# 2. Load the LLaMA 2 7B model and tokenizer using Ollama
model_name = "llama2:7b"
ollama.pull(model_name)  # Ensure the model is downloaded

# Get model details from Ollama
model_details = ollama.show(model_name)
print("Model details:", json.dumps(model_details, indent=2))

# Instead of directly accessing 'base_model', we'll use a more flexible approach
model_path = None
if 'modelfile' in model_details:
    for line in model_details['modelfile'].split('\n'):
        if line.startswith('FROM '):
            model_path = line.split(' ')[1].strip()
            break

if not model_path:
    raise ValueError("Could not determine model path from Ollama details")

print(f"Using model path: {model_path}")

Model details: {
  "license": "LLAMA 2 COMMUNITY LICENSE AGREEMENT\t\nLlama 2 Version Release Date: July 18, 2023\n\n\"Agreement\" means the terms and conditions for use, reproduction, distribution and \nmodification of the Llama Materials set forth herein.\n\n\"Documentation\" means the specifications, manuals and documentation \naccompanying Llama 2 distributed by Meta at ai.meta.com/resources/models-and-\nlibraries/llama-downloads/.\n\n\"Licensee\" or \"you\" means you, or your employer or any other person or entity (if \nyou are entering into this Agreement on such person or entity's behalf), of the age \nrequired under applicable laws, rules or regulations to provide legal consent and that \nhas legal authority to bind your employer or such other person or entity if you are \nentering in this Agreement on their behalf.\n\n\"Llama 2\" means the foundational large language models and software and \nalgorithms, including machine-learning model code, trained model weights, \ninference

In [8]:
model_details

{'license': 'LLAMA 2 COMMUNITY LICENSE AGREEMENT\t\nLlama 2 Version Release Date: July 18, 2023\n\n"Agreement" means the terms and conditions for use, reproduction, distribution and \nmodification of the Llama Materials set forth herein.\n\n"Documentation" means the specifications, manuals and documentation \naccompanying Llama 2 distributed by Meta at ai.meta.com/resources/models-and-\nlibraries/llama-downloads/.\n\n"Licensee" or "you" means you, or your employer or any other person or entity (if \nyou are entering into this Agreement on such person or entity\'s behalf), of the age \nrequired under applicable laws, rules or regulations to provide legal consent and that \nhas legal authority to bind your employer or such other person or entity if you are \nentering in this Agreement on their behalf.\n\n"Llama 2" means the foundational large language models and software and \nalgorithms, including machine-learning model code, trained model weights, \ninference-enabling code, training-en

In [None]:
# Load the model and tokenizer using the local path
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    device_map="auto",
    torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
)


In [None]:
# Ensure the model is on the correct device
model = model.to(device)


In [None]:
# Prepare your dataset
dataset = load_dataset("csv", data_files="your_dataset.csv")

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)


In [None]:
# Set up LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)


In [None]:
# 6. Set up training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    learning_rate=1e-4,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    fp16=device.type == "cuda",  # Use mixed precision training if on GPU
    gradient_accumulation_steps=4,  # Adjust based on your GPU memory
)


In [None]:

# 7. Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)


In [None]:

# 8. Start the fine-tuning process
trainer.train()



In [None]:
# 9. Save the fine-tuned LoRA model
model.save_pretrained("./fine_tuned_llama_2_7b_lora")
tokenizer.save_pretrained("./fine_tuned_llama_2_7b_lora")

# Print final GPU memory usage
if device.type == "cuda":
    print(f"Final GPU memory usage: {torch.cuda.memory_allocated() / 1e9:.2f} GB")