#### Start

In [2]:
# mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [8]:
# Load token from file
import os
with open('/content/drive/MyDrive/hf_token.txt', 'r') as file:
    hf_token = file.read().strip()

### Finetuning

In [None]:
!pip install datasets bitsandbytes peft

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import torch
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
folder_path = "/content/drive/MyDrive/data_argi_llm/kvk_pop/LAKSHADWEEP/lakshadweep/lakshadweep"

In [9]:
# Model and dataset configuration
model_name = "NousResearch/Llama-2-7b-chat-hf"
dataset_path = os.path.join(folder_path, 'data_ft.txt')
new_model = "Llama-2-7b-chat-ft"

In [None]:
# Load tokenizer and model with 4-bit precision and configure LoRA
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    use_auth_token=hf_token
)
model = prepare_model_for_kbit_training(model)

# Configure LoRA parameters and apply LoRA to the model
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.config.use_cache = False


In [None]:
# Load dataset
dataset = load_dataset("text", data_files=dataset_path, split="train")

# Function to format the dataset for training
def format_dataset(example):
    # Extract the relevant parts from the string
    text = example['text']
    return {
        'input_ids': tokenizer(text, padding='max_length', truncation=True, max_length=512).input_ids,
        'labels': tokenizer(text, padding='max_length', truncation=True, max_length=512).input_ids
    }

# Format the dataset
dataset = dataset.map(format_dataset, remove_columns=["text"])

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir=os.path.join(folder_path, 'results'),
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    fp16=False,
    bf16=False,
    learning_rate=2e-4,
    weight_decay=0.001,
    logging_steps=25,
    max_grad_norm=0.3,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    save_steps=0,
    report_to="tensorboard"
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
)

# Start fine-tuning
trainer.train()


In [None]:
# Save trained model
trainer.model.save_pretrained(os.path.join(folder_path, new_model))
tokenizer.save_pretrained(os.path.join(folder_path, new_model))

### Output

In [None]:
!pip install transformers pipeline

In [15]:
import logging
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

In [None]:
# Load the fine-tuned model and tokenizer with quantization
model_name = os.path.join(folder_path, new_model)  # Ensure this path is correct and contains required files

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,  # Directly specify loading in 4-bit mode
    torch_dtype=torch.float16  # Set the compute dtype
)

# Run text generation pipeline with our fine-tuned model
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

# Function to generate response
def generate_response(prompt):
    result = pipe(f"<s>[INST] {prompt} [/INST]")
    return result[0]['generated_text']

# Example usage
prompt = "What is the bio bin made of?"
output = generate_response(prompt)
print(output)
