<a href="https://colab.research.google.com/github/himani26/CaseStudy/blob/main/Trinity_fine_tune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Trinity ChatQnA**:


# **Installations**

In [None]:

!pip install -q transformers peft datasets bitsandbytes accelerate sentencepiece pypdf2 streamlit pyngrok anthropic

In [None]:
pip install openai==0.28

In [None]:

import os, torch, textwrap, json, openai
from PyPDF2 import PdfReader
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel

In [None]:
# 3Ô∏è‚É£ OpenAI API Key (for Q&A generation)
from anthropic import Anthropic
os.environ["ANTHROPIC_API_KEY"] = "secret_key_here"

client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))

# **Upload files for training**

In [None]:

from google.colab import files
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

In [None]:

def extract_text_from_file(path):
    if path.lower().endswith(".pdf"):
        reader = PdfReader(path)
        return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
    elif path.lower().endswith(".txt"):
        return open(path).read()
    else:
        raise ValueError("Only PDF or TXT supported")

doc_text = extract_text_from_file(file_name)
print("‚úÖ Loaded document:", file_name, f"({len(doc_text)} characters)")

# **Generating QnA Pairs**
For each chunk generating 8 chunks for training purpose

In [None]:

def generate_qna_pairs(text, num_pairs=8):
    import re, json
    chunks = textwrap.wrap(text, 1500)
    qa_pairs = []


    for chunk in chunks[:3]:
        prompt = f"""
        Generate {num_pairs} factual question‚Äìanswer pairs from the text below.
        Return ONLY valid JSON array (no markdown, no explanations).

        Text:
        {chunk}
        """
        response = client.messages.create(
            model="claude-3-opus-20240229",
            messages=[

              {"role": "user", "content": prompt}
            ],
            temperature=0.3,
            max_tokens=1000
        )

        # content = response["choices"][0]["message"]["content"]
        content = response.content[0].text.strip()
        content = re.sub(r"^```json|```$", "", content.strip())
        content = re.sub(r"^```|```$", "", content.strip())

        match = re.search(r"\[.*\]", content, re.S)
        if match:
            json_str = match.group(0)
            try:
                pairs = json.loads(json_str)
                qa_pairs.extend(pairs)
            except Exception as e:
                print("‚ö†Ô∏è JSON decode error:", e)
        else:
            print("‚ö†Ô∏è No valid JSON block detected.")

    return qa_pairs

In [None]:
lis_qna = generate_qna_pairs(doc_text)

In [None]:
print(lis_qna)

# **Tokenizing the generated QnA Pairs**

In [None]:

for item in lis_qna:
    if "question" in item:
        item["instruction"] = item.pop("question")
    if "answer" in item:
        item["response"] = item.pop("answer")
dataset = Dataset.from_list(lis_qna)
base_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(base_model)

def format_and_tokenize(example):
    prompt = f"<|user|> {example['instruction']}\n<|assistant|> {example['response']}"
    toks = tokenizer(prompt, truncation=True, max_length=512, padding="max_length")
    toks["labels"] = toks["input_ids"].copy()
    return toks

tokenized_ds = dataset.map(format_and_tokenize)
print("‚úÖ Tokenized", len(tokenized_ds), "examples")

# **Fine Tuning with LORA**

In [None]:

model = AutoModelForCausalLM.from_pretrained(base_model, load_in_8bit=True, device_map="auto")
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj","v_proj"],
                         lora_dropout=0.1, bias="none", task_type="CAUSAL_LM")
model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    num_train_epochs=1,
    learning_rate=1e-4,
    fp16=True,
    logging_steps=10,
    output_dir="./lora-finetune",
    save_strategy="no",
    report_to="none"
)

trainer = Trainer(model=model, train_dataset=tokenized_ds, args=training_args)
print("üöÄ Fine-tuning started...")
trainer.train()
model.save_pretrained("lora-finetune")
tokenizer.save_pretrained("lora-finetune")
print("‚úÖ LoRA adapter saved in ./lora-finetune")

In [None]:

%%writefile streamlit_lora_app.py
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

st.set_page_config(page_title="LoRA QA Chatbot", layout="wide")
st.title("üí¨ LoRA Fine-Tuned Chatbot")

@st.cache_resource
def load_model():
    base = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", load_in_8bit=True, device_map="auto")
    model = PeftModel.from_pretrained(base, "lora-finetune")
    tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
    return model, tokenizer

model, tokenizer = load_model()

def generate_answer(prompt):
    input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.inference_mode():
        output = model.generate(**input_ids, max_new_tokens=512, temperature=0.7,eos_token_id=None)
    txt = tokenizer.decode(output[0], skip_special_tokens=True)
    # Remove role tokens
    txt = txt.replace("<|user|>", "").replace("<|assistant|>", "").strip()
    return txt.split("### Response:")[-1].strip()

if "chat" not in st.session_state:
    st.session_state.chat = []

user_input = st.text_input("Ask something related to the uploaded document:")
if st.button("Send") and user_input:
    prompt = f"<|user|> {user_input}\n<|assistant|>"
    answer = generate_answer(prompt)
    st.session_state.chat.append(("üßë", user_input))
    st.session_state.chat.append(("ü§ñ", answer))

for s, m in reversed(st.session_state.chat):
    if s == "üßë": st.markdown(f"**{s} You:** {m}")
    else: st.info(f"**{s} Bot:** {m}")

In [None]:

from pyngrok import ngrok
ngrok.set_auth_token("secret_key_here")
print("üöÄ Launching chatbot...")
get_ipython().system_raw("streamlit run streamlit_lora_app.py --server.port 8501 &")
public_url = ngrok.connect(8501).public_url
print("‚úÖ Your chatbot is live at:", public_url)