In [1]:
%%capture
%pip uninstall pysqlite3 -y
%pip install pysqlite3-binary

%pip install -U transformers bitsandbytes huggingface_hub langchain_community langchain_text_splitters chromadb
%pip install -U langchain langchain-chroma
%pip install faiss-cpu
%pip install sentence-transformers
%pip install -U langchain-huggingface
%pip install -U peft torch transformers huggingface_hub


In [3]:
import torch
from transformers import RobertaTokenizerFast, RobertaForTokenClassification, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, BitsAndBytesConfig, pipeline
import json
from huggingface_hub import login
import os
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from peft import PeftModel, PeftConfig

# Define paths
DATA_PATH = "/kaggle/input/10pages"
FAISS_INDEX_PATH = "/kaggle/working/faiss_index"

# Setup device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Login to Hugging Face using your API token
hf_token = ""
login(hf_token)

# Load the RoBERTa NER model
ner_model = RobertaForTokenClassification.from_pretrained('adamfendri/robertaL_ner', use_auth_token=hf_token).to(device)
ner_tokenizer = RobertaTokenizerFast.from_pretrained('adamfendri/robertaL_ner', use_auth_token=hf_token)

# 4-bit Quantization Configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load your fine-tuned Gemma-2 model with 4-bit quantization
config = PeftConfig.from_pretrained("adamfendri/Gemma-2-2b-it-medical")
base_model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2-2b",
    quantization_config=bnb_config,
    device_map={"": device},  # Ensure the model is loaded on the correct device
)
model = PeftModel.from_pretrained(base_model, "adamfendri/Gemma-2-2b-it-medical").to(device)

# Load the tokenizer for Gemma-2
tokenizer = AutoTokenizer.from_pretrained("adamfendri/Gemma-2-2b-it-medical", use_auth_token=hf_token)

# Load the T5-Large model for summarization
summarizer_tokenizer = AutoTokenizer.from_pretrained("t5-large")
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("t5-large").to(device)

# Initialize the summarization pipeline
summarizer = pipeline("summarization", model=summarizer_model, tokenizer=summarizer_tokenizer, device=device.index if device.type == 'cuda' else -1)

# Define the prompt template
PROMPT_TEMPLATE = """
You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and provide an informative answer. Answer the following question based on the provided context. If the context is irrelevant to the question, do not use it in your response. If you are unsure about a medical inquiry, advise seeking professional help.

{context}

---

Question: {question}
Answer:
"""

# Function to get embedding function
def get_embedding_function():
    return HuggingFaceEmbeddings()

# Function to load documents from PDF
def load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()

# Function to split documents
def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=350,
        chunk_overlap=50,
    )
    return text_splitter.split_documents(documents)

# Function to add documents to FAISS index
def add_to_faiss(chunks):
    if os.path.exists(FAISS_INDEX_PATH):
        db = FAISS.load_local(FAISS_INDEX_PATH, embeddings=get_embedding_function(), allow_dangerous_deserialization=True)
    else:
        db = FAISS.from_documents(chunks, embedding=get_embedding_function())
    
    db.save_local(FAISS_INDEX_PATH)

# Function to process user input with NER model
def process_with_ner_model(user_input):
    inputs = ner_tokenizer(user_input, return_tensors="pt", padding=True, truncation=True, is_split_into_words=False).to(device)
    with torch.no_grad():
        outputs = ner_model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)

    # Define the label map
    id_to_label = {0: 'O', 1: 'B-WEIGHT', 2: 'I-WEIGHT', 3: 'B-HEIGHT', 4: 'I-HEIGHT', 5: 'B-AGE', 6: 'I-AGE'}

    # Extract predicted labels and the corresponding tokens
    tokens = ner_tokenizer.tokenize(user_input)
    pred_labels = [id_to_label.get(pred.item(), 'O') for pred in predictions[0]]

    # Improved Entity Extraction Logic
    def extract_entities(predicted_labels):
        entities = {"age": "", "height": "", "weight": ""}
        current_entity = None
        current_value = []

        for token, label in zip(tokens, pred_labels):
            if token.startswith("Ġ"):  # Handle space
                token = token[1:]  # Remove the special character for space

            if label.startswith("B-"):
                if current_entity:
                    # Join the tokens for the previous entity
                    entities[current_entity.lower()] = " ".join(current_value).strip()
                current_entity = label[2:]  # Start new entity
                current_value = [token]
            elif label.startswith("I-") and current_entity:
                current_value.append(token)  # Continue the entity
            else:
                if current_entity:
                    # Finalize the current entity
                    entities[current_entity.lower()] = " ".join(current_value).strip()
                current_entity = None
                current_value = []

        # Ensure the last entity is added
        if current_entity:
            entities[current_entity.lower()] = " ".join(current_value).strip()

        return entities

    return extract_entities(pred_labels)

# Function to query the RAG system and interact with the chatbot
def query_rag(query_text: str):
    # Prepare the FAISS index
    embedding_function = get_embedding_function()
    db = FAISS.load_local(FAISS_INDEX_PATH, embeddings=embedding_function, allow_dangerous_deserialization=True)

    # Search the index
    results = db.similarity_search_with_score(query_text, k=2)

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print("Generated Prompt:")
    print(prompt)  # Check the generated prompt for correctness

    # Use Gemma-2 model for RAG
    try:
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
        with torch.no_grad():
            response = model.generate(inputs.input_ids.to(device), max_length=512)
        response_text = tokenizer.decode(response[0], skip_special_tokens=True)
        sources = [doc.metadata.get("id", None) for doc, _score in results]
        formatted_response = f"Question: {query_text}\nAnswer: {response_text}"
        print(formatted_response)
        return response_text

    except Exception as e:
        print(f"Error invoking Gemma-2 model: {e}")
        return None

# Function to summarize the conversation
def summarize_conversation(question_text, response_text):
    # Only pass the user question and AI response text to the summarizer
    conversation_text = f"Question: {question_text}\nAnswer: {response_text}"
    summary = summarizer(conversation_text, max_length=200, min_length=50, do_sample=False)
    return summary[0]['summary_text']

# Function to populate the FAISS index
def populate_db():
    documents = load_documents()
    chunks = split_documents(documents)
    add_to_faiss(chunks)

# Main function to run RAG and NER with summarization
def main(user_input):
    # Step 1: NER Extraction
    ner_entities = process_with_ner_model(user_input)

    # Step 2: RAG Query with Gemma-2
    chatbot_response = query_rag(user_input)

    # Step 3: Summarization
    summary = summarize_conversation(user_input, chatbot_response)

    # Print Outputs
    print("NER Entities Detected:", json.dumps(ner_entities, indent=4))
    print("Chatbot Response:", chatbot_response)
    print("Conversation Summary:", summary)

# Example Interaction
if __name__ == "__main__":
    # Populate the database first
    populate_db()

    # Example interaction
    user_input = "I weigh 70 kg, my height is 175 cm. I am 25 years old. Is it considered healthy for my age?"
    main(user_input)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Generated Prompt:
Human: 
You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and provide an informative answer. Answer the following question based on the provided context. If the context is irrelevant to the question, do not use it in your response. If you are unsure about a medical inquiry, advise seeking professional help.

1
Cardiovascular Diseases
Aortic Dissection
IEssentials of Diagnosis
•Most patients between age 50 and age 70; risks include hyper-
tension, Marfan ’s syndrome, bicuspid aortic valve, coarctation of
the aorta, and pregnancy
•Type A involves the ascending aorta or arch; type B does not

---

Ann Thorac Surg 2000;69:1496. [PMID: 10881829]Chapter 1 Cardiovascular Diseases 5
1

---

Question: I weigh 70 kg, my height is 175 cm. I am 25 years old. Is it considered healthy for my age?
Answer:

Question: I weigh 70 kg, my height is 175 cm. I am 25 years old. Is it considered healthy for my age?
Answer: Human: 
You are an 