<a href="https://colab.research.google.com/github/apoorvapu/data_science/blob/main/Build_ChatBot_TrainedOnBook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**fine-tuning LLM**

In [None]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
import gradio as gr

# Load Bible Dataset (You can customize this to use a different translation)
dataset = load_dataset("Helsinki-NLP/opus-100", "en-he")

# Process the dataset into a fine-tuning format
def preprocess_data(example):
    return {"input": example["translation"]["en"], "output": example["translation"]["he"]}

dataset = dataset.map(preprocess_data, remove_columns=["translation"])

# Load LLaMA 2 Tokenizer and Model (Meta AI's 7B model, using QLoRA for efficient fine-tuning)
model_name = "meta-llama/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")

# Apply QLoRA for memory-efficient fine-tuning
lora_config = LoraConfig(r=16, lora_alpha=32, lora_dropout=0.1, target_modules=["q_proj", "v_proj"])
model = get_peft_model(model, lora_config)

# Define Training Arguments
training_args = TrainingArguments(
    output_dir="./bible_llama2",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    num_train_epochs=3,
    save_total_limit=2,
    load_best_model_at_end=True,
)

# Trainer Setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./bible_llama2")
tokenizer.save_pretrained("./bible_llama2")

# Load Fine-tuned Model for Chatbot
model = AutoModelForCausalLM.from_pretrained("./bible_llama2", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("./bible_llama2")

def chatbot_response(prompt):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
    output = model.generate(input_ids, max_length=150)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Create a ChatGPT-style Interface
def chat_interface(user_input):
    return chatbot_response(user_input)

# Gradio Chatbot UI
gui = gr.Interface(fn=chat_interface,
                   inputs="text",
                   outputs="text",
                   title="Bible Chatbot",
                   description="Ask anything about the Bible and get responses!")

gui.launch()
