# Try different LLM for RAG

In [36]:
import pandas as pd
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import Document
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import time
import torch


In [13]:
!nvidia-smi

Sun Jul  7 21:46:38 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P4                       On  |   00000000:00:04.0 Off |                    0 |
| N/A   51C    P0             24W /   75W |    1142MiB /   7680MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [14]:
# !sudo kill -9 38817

In [15]:
# Load chromadb and csv
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

vectordb = Chroma(persist_directory="./chroma_db_alt", embedding_function=embedding_function)

# Function to perform similarity search
def similarity_search(query, k=5):
    results = vectordb.similarity_search(query, k=k)
    return results




In [18]:
# Load LLaMA 3 model
model_id = "meta-llama/Meta-Llama-Guard-2-8B"
device = "cuda" if torch.cuda.is_available() else "cpu"


# Initialize model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32).to(device)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

# Basic llama3

In [30]:
# Function to generate answers using the basic LLM
def generate_basic_llm_answer(question):
    prompt = (
        f"You are an expert on VA disability benefits. You have deep knowledge and understanding of all aspects related to VA disability claims and benefits. "
        f"Please provide a detailed and accurate answer to the following question based on your expertise.\n\n"
        f"Question: {question}\n"
        f"Answer:"
    )
    
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    output = model.generate(input_ids=input_ids, max_new_tokens=500, pad_token_id=tokenizer.pad_token_id, temperature=0)
    
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer = generated_text.split("Answer:")[1].strip() if "Answer:" in generated_text else generated_text.strip()
    
    return answer

def log_process_time(start_time):
    end_time = time.time()
    return end_time - start_time

def process_csv(df):
    
    answers = []
    times = []
    
    for question in df['question']:
        start_time = time.time()
        answer = generate_basic_llm_answer(question)
        process_time = log_process_time(start_time)
        
        answers.append(answer)
        times.append(process_time)
    
    df['basic_llm'] = answers
    df['basic_time'] = times
    
    return df

# LLama3 RAG


In [31]:
# LLama3 RAG


# generate answers using RAG
def generate_rag_answer(question, k=3):
    results = similarity_search(question, k)
    
    context = "\n\n".join([f"Source {i+1}: {result.page_content}" for i, result in enumerate(results)])
    
    prompt = (
        f"You are an expert on VA disability benefits. You will be provided with context information from several sources.\n"
        f"Use only the provided information to answer the question accurately and concisely. Do not use any external knowledge. "
        f"Base your answer strictly on the context provided.\n\n"
        f"Please follow these instructions carefully to provide an accurate answer:\n"
        f"1. Carefully read each paragraph of the provided content.\n"
        f"2. Identify if the paragraph contains relevant information to answer the question. If a paragraph does not provide relevant information, move to the next paragraph.\n"
        f"3. When you find relevant information, use it to construct your answer. Include as much evidence as possible from the context to support your answer, even if an answer has already been started.\n"
        f"4. Ensure your answer is accurate, concise, and based solely on the provided context.\n\n"
        f"Context:\n"
        f"{context}\n\n"
        f"Question: {question}\n"
        f"Answer:"
    )
    
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    output = model.generate(input_ids=input_ids, max_new_tokens=500, pad_token_id=tokenizer.pad_token_id, temperature=0)
    
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer = generated_text.split("Answer:")[1].strip() if "Answer:" in generated_text else generated_text.strip()
    
    return answer, results

def log_process_time(start_time):
    end_time = time.time()
    return end_time - start_time

def process_rag_csv(df):
    
    rag_answers = []
    process_times = []
    similarity_metadata = []

    for question in df['question']:
        start_time = time.time()
        answer, results = generate_rag_answer(question)
        process_time = log_process_time(start_time)
        
        similarity_info = "\n\n".join([f"Title: {result.metadata['source_url']}\nContent: {result.page_content}" for result in results])
        
        rag_answers.append(answer)
        process_times.append(process_time)
        similarity_metadata.append(similarity_info)
    
    df['rag_llm'] = rag_answers
    df['rag_time'] = process_times
    df['rag_similarity'] = similarity_metadata
    
    return df




In [21]:
df_va_content = pd.read_csv('./dataset/processed_va_questions.csv')
df_va_content =  df_va_content.dropna(subset=['answer'])
df_va_content.head()

Unnamed: 0.1,Unnamed: 0,source_url,title,content,index,question,answer
0,0,https://www.va.gov/disability/after-you-file-c...,The VA claim process after you file your claim...,Learn about what happens after you file your c...,0,What is the average number of days it takes to...,The average number of days it takes to process...
1,1,https://www.va.gov/disability/after-you-file-c...,How long does it take VA to make a decision?. ...,147.2 days Average number of days to complete ...,1,What is the average number of days it takes fo...,The average number of days it takes for the VA...
2,2,https://www.va.gov/disability/after-you-file-c...,The time it takes to review your claim depends...,The time it takes to review your claim depends...,2,What factors affect the time it takes to revie...,The time it takes to review a claim for disabi...
3,3,https://www.va.gov/disability/after-you-file-c...,What should I do while I wait?. disability aft...,You don’t need to do anything unless we send y...,3,What should you do while waiting for a disabil...,While waiting for a disability claim after fil...
4,4,https://www.va.gov/disability/after-you-file-c...,What happens after I file a VA disability clai...,Claim received We’ll let you know when we rece...,4,What are the steps involved in the process aft...,"After filing a VA disability claim, the first ..."


In [28]:
df_process = process_csv(df_va_content)

df_rag = process_rag_csv(df_process)

df_rag.to_csv('./dataset/qa_va_content_rag.csv')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end gene

In [29]:
df_rag.head()

Unnamed: 0.1,Unnamed: 0,source_url,title,content,index,question,answer,basic_llm,basic_time,rag_llm,rag_time,rag_similarity
0,0,https://www.va.gov/disability/after-you-file-c...,The VA claim process after you file your claim...,Learn about what happens after you file your c...,0,What is the average number of days it takes to...,The average number of days it takes to process...,The average number of days it takes to process...,12.445703,147.2 days,17.371511,Title: https://www.va.gov/disability/after-you...
1,1,https://www.va.gov/disability/after-you-file-c...,How long does it take VA to make a decision?. ...,147.2 days Average number of days to complete ...,1,What is the average number of days it takes fo...,The average number of days it takes for the VA...,The average number of days it takes for VA to ...,12.905303,147,13.435543,Title: https://www.va.gov/disability/after-you...
2,2,https://www.va.gov/disability/after-you-file-c...,The time it takes to review your claim depends...,The time it takes to review your claim depends...,2,What factors affect the time it takes to revie...,The time it takes to review a claim for disabi...,The time it takes to review a claim for disabi...,35.22736,"The type of claim you filed, how many injuries...",32.19604,Title: https://www.va.gov/disability/after-you...
3,3,https://www.va.gov/disability/after-you-file-c...,What should I do while I wait?. disability aft...,You don’t need to do anything unless we send y...,3,What should you do while waiting for a disabil...,While waiting for a disability claim after fil...,The best thing to do while waiting for a disab...,39.914326,The answer to this question is based on the pr...,67.983859,Title: https://www.va.gov/disability/after-you...
4,4,https://www.va.gov/disability/after-you-file-c...,What happens after I file a VA disability clai...,Claim received We’ll let you know when we rece...,4,What are the steps involved in the process aft...,"After filing a VA disability claim, the first ...",The steps involved in the process after filing...,85.216858,The steps involved in the process after filing...,24.199235,Title: https://www.va.gov/disability/after-you...


In [26]:
results[0]

Document(metadata={'source_url': 'https://www.va.gov/disability/after-you-file-claim/'}, page_content='What should I do while I wait?. disability after you file claim/nYou don’t need to do anything unless we send you a letter asking for more information. If we schedule any exams for you, be sure not to miss them. You can check the status of your claim online. The timeline listed there may vary based on how complex your claim is. Check your VA claim status')

# Rag With Reranker

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer, util
import numpy as np
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext,PromptTemplate
from sentence_transformers import SentenceTransformer, util
from langchain.vectorstores import Chroma
from llama_index.core.postprocessor import SentenceTransformerRerank

In [None]:
model_id = "meta-llama/Meta-Llama-Guard-2-8B"
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32).to(device)

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

vectordb = Chroma(persist_directory="./chroma_db_alt", embedding_function=embedding_model.encode)

reranker = SentenceTransformerRerank()

In [None]:
def similarity_search(query, k=5):
    results = vectordb.similarity_search(query, k=k)
    return results

In [None]:
def generate_rag_answer(question, k=5):
    results = similarity_search(question, k)
    
    reranked_results = reranker.rerank(results, query=question)
    
    context = "\n\n".join([f"Source {i+1}: {result['page_content']}" for i, result in enumerate(reranked_results)])
    
    prompt = (
        f"You are an expert on VA disability benefits. You will be provided with context information from several sources.\n"
        f"Use only the provided information to answer the question accurately and concisely. Do not use any external knowledge. "
        f"Base your answer strictly on the context provided.\n\n"
        f"Please follow these instructions carefully to provide an accurate answer:\n"
        f"1. Carefully read each paragraph of the provided content.\n"
        f"2. Identify if the paragraph contains relevant information to answer the question. If a paragraph does not provide relevant information, move to the next paragraph.\n"
        f"3. When you find relevant information, use it to construct your answer. Include as much evidence as possible from the context to support your answer, even if an answer has already been started.\n"
        f"4. Ensure your answer is accurate, concise, and based solely on the provided context.\n\n"
        f"Context:\n"
        f"{context}\n\n"
        f"Question: {question}\n"
        f"Answer:"
    )
    
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
    output = model.generate(input_ids=input_ids, max_new_tokens=500, pad_token_id=tokenizer.pad_token_id)
    
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    answer = generated_text.split("Answer:")[1].strip() if "Answer:" in generated_text else generated_text.strip()
    
    return answer, reranked_results

In [32]:

def process_rag_rerank_csv(df):
    
    rag_answers = []
    process_times = []
    similarity_metadata = []

    for question in df['question']:
        start_time = time.time()
        answer, results = generate_rag_answer(question)
        process_time = log_process_time(start_time)
        
        similarity_info = "\n\n".join([f"Title: {result.metadata['source_url']}\nContent: {result.page_content}" for result in results])
        
        rag_answers.append(answer)
        process_times.append(process_time)
        similarity_metadata.append(similarity_info)
    
    df['rag_rerank_llm'] = rag_answers
    df['rag_rerank_time'] = process_times
    df['rag_rerank_similarity'] = similarity_metadata
    
    return df

In [53]:
df_rerank = process_rag_rerank_csv(df_rag)


In [56]:
df_rerank.to_csv('./dataset/qa_va_content_rag.csv')