## Installations

In [1]:
pip install llama-index ollama llama-index-embeddings-huggingface llmsherpa llama-index-vector-stores-pinecone llama-index-llms-ollama llama-index-postprocessor-flag-embedding-reranker FlagEmbedding llama-index-retrievers-bm25 ragas



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Document
from llmsherpa.readers import LayoutPDFReader
from llama_index.core import VectorStoreIndex

from dotenv import load_dotenv
load_dotenv('/Users/divyahegde/Documents/LLM/Final Project/.env') # LlamaCloud API Key
from llama_parse import LlamaParse


In [3]:
from pinecone import Pinecone
from pinecone import ServerlessSpec
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import StorageContext

Settings.chunk_size = 1024
Settings.chunk_overlap = 50

pc_api_key = os.environ['PINECONE_API_KEY']
pc = Pinecone(api_key = pc_api_key)

#pinecone_index = pc.Index("test")
pinecone_index = pc.Index("hybrid-search-visa-wise2")

vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [4]:
documents = SimpleDirectoryReader(input_dir = "/Users/divyahegde/Documents/LLM/Final Project/uscis_new 2").load_data(num_workers=4, show_progress = True)
documents

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4
  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4
  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4
  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


[Document(id_='709d62cc-c1db-4e4b-8f5f-4058f87c82fb', embedding=None, metadata={'page_label': '1', 'file_name': 'All Forms _ USCIS.pdf', 'file_path': '/Users/divyahegde/Documents/LLM/Final Project/uscis_new 2/All Forms _ USCIS.pdf', 'file_type': 'application/pdf', 'file_size': 484729, 'creation_date': '2024-08-03', 'last_modified_date': '2024-08-01'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text="Skip to main content\nU.S. ﬂag\nAn ofﬁcial website of the United States government\xa0 \xa0\xa0 Here's how you know\nEspañol\nMultilingual Resources\nOfﬁcial Government Website\nOfﬁcial websites use .gov\nA .gov website belongs to an ofﬁcial government or ganization in the United States.\nSecure Website\nSecur e .gov websites use HTTPS\nA lock (  ) or https://

In [5]:
from llama_index.core.node_parser import SentenceSplitter

# initialize node parser
splitter = SentenceSplitter(chunk_size=1024)

nodes = splitter.get_nodes_from_documents(documents)

In [6]:
import torch

#Load an Embedding Model from HuggingFace
Settings.embed_model = HuggingFaceEmbedding(
    model_name = "BAAI/bge-base-en-v1.5" #768 Embedding Dimension
)

In [7]:
#Getting the Vectors from Pinecone DB - Do this if the Pinecone Index already has vectors

index = VectorStoreIndex.from_vector_store(vector_store=vector_store, embed_model = Settings.embed_model)

In [8]:
from llama_index.llms.ollama import Ollama

Settings.llm = Ollama(model="llama3:instruct", request_timeout=360.0)
#Settings.llm = Ollama(model="mistral", request_timeout=360.0) #Mistral 7B has a larger context size

In [9]:
#structured_answer_filtering=True ensures that the Response Synthesizer is able to filter out any input nodes that are not relevant to the question being asked.
#response_mode = 'compact'
#qa_prompt creates a custom prompt template and answers the 

from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core import PromptTemplate
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
import Stemmer

qa_prompt = PromptTemplate(
    # "Context information is below.\n"
    # "---------------------\n"
    "You are a USCIS policy helper\n." 
    "You will be provided with a query about USCIS policies and guidelines and you must answer it clearly and provide detailed steps using only the context information and not any prior knowledge\n." 
    "If the steps need to follow a certain order then ensure that the order is stated clearly. If any mathematical calculations need to be done make sure to show them clearly. If any forms need to be filed, make sure to specify what those forms are. Also cite any actual URLs if required to provide more clarity and make sure that these URLs are not broken.\n"
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}"
    "---------------------\n"
    "Given the context information and not prior knowledge, answer the query"
    "Query: {query_str}\n"
    "Answer: "
)

vector_retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k = 8,
    embed_model = Settings.embed_model
)

##added fpor fusion
bm25_retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=8,
    # Optional: We can pass in the stemmer and set the language for stopwords
    # This is important for removing stopwords and stemming the query + text
    # The default is english for both
    stemmer=Stemmer.Stemmer("english"),
    language="english"
)

retriever = QueryFusionRetriever(
   [vector_retriever, bm25_retriever],
    similarity_top_k=8,
    num_queries=1,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    query_gen_prompt=qa_prompt,  # we could override the query generation prompt here
)

response_synthesizer = get_response_synthesizer(response_mode = "compact", llm = Settings.llm, text_qa_template = qa_prompt)

In [10]:
from IPython.display import display, Markdown
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer
    #node_postprocessors = [rerank]
)

In [11]:
import nest_asyncio

nest_asyncio.apply()

In [12]:
response = query_engine.query('''I'm a F1 student and I'm going through an economic crisis. Can I take an off-campus employment to support myself and is it illegal to do so?''')
display(Markdown(response.response))

#Describe the requirements for an F-1 student returning to the United States after a temporary absence of 5 months or less.
#I'm on a dependant visa. Will enrolling myself into a university require a F1 visa?

Here is the rewritten answer:

As an F-1 student facing economic crisis, you may be eligible for off-campus employment authorization due to severe economic hardship. To qualify, you must demonstrate that accepting this employment will not interfere with carrying a full course of study and that it's necessary to avoid severe economic hardship.

To apply, follow these steps: request a recommendation from your Designated School Official (DSO) for off-campus employment, have the DSO complete such certification in SEVIS, and submit Form I-765 with the required fee and supporting materials. USCIS will adjudicate your application based on Forms I-20 and I-765 or successor forms, and any additional supporting materials.

Please note that unauthorized off-campus employment is illegal for F-1 students. Engaging in unauthorized work can result in termination of your status and potential consequences, including deportation.

To avoid issues, it's recommended to follow the guidelines above and seek guidance from your DSO or a qualified immigration attorney if you have questions or concerns about the application process.

Remember to maintain lawful status as an F-1 student and comply with regulations governing your nonimmigrant status.

When embeddings of larger dimensions are used (i.e. 1024) it takes a long time to create and upsert the embeddings and during retreival and reranking, the vectors are more sparse and hence it becomes difficult to find similar vectors and rerank them

## Evaluating RAG with hybrid, RAG with just vector search, and LLM only responses

In [13]:
from tqdm import tqdm 
from llama_index.llms.ollama import Ollama
from IPython.display import display, Markdown
from llama_index.core.query_engine import RetrieverQueryEngine
import pandas as pd 

df = pd.read_csv("/Users/divyahegde/Documents/LLM/Final Project/VisaWise Questions - Context based.csv")

query_engine_vector = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=response_synthesizer,
    #node_postprocessors = [rerank]
)

llm_only = Ollama(model="llama3:instruct", request_timeout=360.0, temperature = 0.3, num_beams = 3)

for i in tqdm(range(len(df))):
    message = df.loc[i, 'Question']
    df.loc[i, 'llm_only_response'] = llm_only.complete(message).text 
    df.loc[i, 'rag_response_hybrid'] = query_engine.query(message).response 
    df.loc[i, 'rag_response_vec'] = query_engine_vector.query(message).response

#df['embed_model'] = Settings.embed_model.model_name
#df['embedding_dimension'] = 768
#df['chunk_size'] = Settings.chunk_size 
#df['chunk_overlap'] = Settings.chunk_overlap 
#df['similarity_top_k'] = retriever.similarity_top_k 
#df['rerank_top_n'] = rerank.top_n

df

100%|██████████| 10/10 [19:04<00:00, 114.42s/it]


Unnamed: 0,Question,Hybrid RAG ans,Unnamed: 2,llm_only_response,regular_rag_query_engine_response,regular_rag_chat_engine_response,rag_response_hybrid,rag_response_vec
0,I'm on F1 visa but due to an illness I had to ...,"To address your query, let's refer to the rele...",,I understand your concern. Losing momentum in ...,I understand your concern! As you're currently...,Hello! I'm happy to help you with your questio...,Refiling within the designated window is cruci...,"Based on the provided context information, her..."
1,I'm on a dependant visa. Will enrolling myself...,"As a dependent of an F-1 student, you are elig...",,"As a dependent visa holder, your situation is ...","Based on the provided context information, as ...",Hello again!\n \n As a dependent of an F-1 stu...,Enrolling yourself into a university does not ...,Enrolling oneself into a university does not r...
2,Can I buy a house with F1 visa?,It is generally not possible to buy a house wi...,Lol,"In the United States, it's possible to purchas...","I'm happy to help! However, based on the provi...","Hello again!\n \n As an F-1 student, you're no...","Based on the provided policy manual, it is gen...",Given the new context to replace an expiring P...
3,I'm in the process of filing my H-1B. Can I vi...,"Based on the provided context information, I'd...",Need to check for accuracy,A family emergency can be stressful and overwh...,I'd be happy to help you with that!\n \n Since...,Hello again!\n \n As you're going through the ...,"Based on USCIS policy, individuals in your sit...",You're seeking to visit your home due to a fam...
4,I'm an international student on F1 visa. What ...,To ensure you get a green card in the next 10 ...,,Congratulations on taking proactive steps towa...,I'd be happy to help you with that!\n \n As an...,"As an international student on an F-1 visa, ge...",To increase your chances of obtaining a green ...,To obtain a Green Card as an F-1 international...
5,WHat is O1 visa? How is it different from any ...,The O-1 visa is a nonimmigrant visa that allow...,,"The O-1 visa, also known as the ""Extraordinary...","Based on the provided context information, an ...",The O-1 visa!\n \n The O-1 visa is a non-immig...,**Rewrite**\n\nAn O-1 visa is a nonimmigrant v...,The O-1 visa is a non-immigrant visa that allo...
6,I am not a US citizen. Can I get a Green Card ...,,,"As a general rule, a foreign-born individual c...","Hello! Based on the USCIS policy manual, I can...",Hello! I'm happy to help you with that questio...,I understand your query. As an expert Q&A syst...,"To request removal of conditions on residence,..."
7,I graduated from my master's course. I'm still...,"As an international student on OPT, you may re...",,Congratulations on your master's degree!\n\nRe...,,,I will rewrite the original answer using the n...,When you find a job and receive your Employmen...
8,There is a severe economic crisis happening in...,SHIT RESPONSE,,I'm here to help you with your concern.\n\nFir...,,,Starting from the moment that the Form I-140 f...,Practical training may be authorized to an F–1...
9,I'm a F1 student and I'm going through an econ...,An F-1 student experiencing severe economic ha...,,"I understand your concern, and I'm here to hel...",,,As a F-1 student experiencing severe economic ...,As an F-1 student experiencing severe economic...


In [14]:
df.columns

Index(['Question', 'Hybrid RAG ans', 'Unnamed: 2', 'llm_only_response',
       'regular_rag_query_engine_response', 'regular_rag_chat_engine_response',
       'rag_response_hybrid', 'rag_response_vec'],
      dtype='object')

In [17]:
df

Unnamed: 0,Question,Hybrid RAG ans,Unnamed: 2,llm_only_response,regular_rag_query_engine_response,regular_rag_chat_engine_response,rag_response_hybrid,rag_response_vec
0,I'm on F1 visa but due to an illness I had to ...,"To address your query, let's refer to the rele...",,I understand your concern. Losing momentum in ...,I understand your concern! As you're currently...,Hello! I'm happy to help you with your questio...,Refiling within the designated window is cruci...,"Based on the provided context information, her..."
1,I'm on a dependant visa. Will enrolling myself...,"As a dependent of an F-1 student, you are elig...",,"As a dependent visa holder, your situation is ...","Based on the provided context information, as ...",Hello again!\n \n As a dependent of an F-1 stu...,Enrolling yourself into a university does not ...,Enrolling oneself into a university does not r...
2,Can I buy a house with F1 visa?,It is generally not possible to buy a house wi...,Lol,"In the United States, it's possible to purchas...","I'm happy to help! However, based on the provi...","Hello again!\n \n As an F-1 student, you're no...","Based on the provided policy manual, it is gen...",Given the new context to replace an expiring P...
3,I'm in the process of filing my H-1B. Can I vi...,"Based on the provided context information, I'd...",Need to check for accuracy,A family emergency can be stressful and overwh...,I'd be happy to help you with that!\n \n Since...,Hello again!\n \n As you're going through the ...,"Based on USCIS policy, individuals in your sit...",You're seeking to visit your home due to a fam...
4,I'm an international student on F1 visa. What ...,To ensure you get a green card in the next 10 ...,,Congratulations on taking proactive steps towa...,I'd be happy to help you with that!\n \n As an...,"As an international student on an F-1 visa, ge...",To increase your chances of obtaining a green ...,To obtain a Green Card as an F-1 international...
5,WHat is O1 visa? How is it different from any ...,The O-1 visa is a nonimmigrant visa that allow...,,"The O-1 visa, also known as the ""Extraordinary...","Based on the provided context information, an ...",The O-1 visa!\n \n The O-1 visa is a non-immig...,**Rewrite**\n\nAn O-1 visa is a nonimmigrant v...,The O-1 visa is a non-immigrant visa that allo...
6,I am not a US citizen. Can I get a Green Card ...,,,"As a general rule, a foreign-born individual c...","Hello! Based on the USCIS policy manual, I can...",Hello! I'm happy to help you with that questio...,I understand your query. As an expert Q&A syst...,"To request removal of conditions on residence,..."
7,I graduated from my master's course. I'm still...,"As an international student on OPT, you may re...",,Congratulations on your master's degree!\n\nRe...,,,I will rewrite the original answer using the n...,When you find a job and receive your Employmen...
8,There is a severe economic crisis happening in...,SHIT RESPONSE,,I'm here to help you with your concern.\n\nFir...,,,Starting from the moment that the Form I-140 f...,Practical training may be authorized to an F–1...
9,I'm a F1 student and I'm going through an econ...,An F-1 student experiencing severe economic ha...,,"I understand your concern, and I'm here to hel...",,,As a F-1 student experiencing severe economic ...,As an F-1 student experiencing severe economic...


In [18]:
df.to_csv('responses_evaluation.csv')

## RAGAS Framework for evaluation

In [19]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
import os

# generator with openai models
generator_llm = Settings.llm

critic_llm = Ollama(model="mistral-nemo")
embeddings = Settings.embed_model

generator = TestsetGenerator.from_llama_index(
    generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=embeddings,
)


In [20]:
documents_test = SimpleDirectoryReader(input_dir = "/Users/divyahegde/Documents/LLM/Final Project/uscis_new 2/test_set_folder").load_data(num_workers=4, show_progress = True)
documents_test

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

[Document(id_='ea7e0b3d-3b3b-418f-95eb-4092fc3d546b', embedding=None, metadata={'page_label': '1', 'file_name': 'Employment Authorization for Certain H-4 Dependent Spouses _ USCIS.pdf', 'file_path': '/Users/divyahegde/Documents/LLM/Final Project/uscis_new 2/test_set_folder/Employment Authorization for Certain H-4 Dependent Spouses _ USCIS.pdf', 'file_type': 'application/pdf', 'file_size': 200615, 'creation_date': '2024-08-03', 'last_modified_date': '2024-08-01'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Home> Working in the United States> Temporary Workers> H-1B Specialty Occupations>\nEmployment Authorization for Certain H-4 Dependent Spouses\nEmployme nt Authorization for Certain H-4\nDependent Spouses\nCertain H-4 dependent spouses of H-1B noni

In [21]:
len(documents_test)

66

In [22]:

testset = generator.generate_with_llamaindex_docs(
    documents_test,
    test_size=5,
    distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25},
)

embedding nodes:   0%|          | 0/132 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/5 [00:00<?, ?it/s]

In [23]:
test_df = testset.to_pandas()
test_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,Here is a question that can be fully answered ...,[“bundling” forms I-129 and I-539 for certain ...,USCIS announcing process enhancements for defe...,simple,"[{'page_label': '18', 'file_name': 'FAQs for I...",True
1,Here is a question that can be fully answered ...,"[point, to maintain nonimmigrant status your c...",USCIS has taken several actions to help those ...,simple,"[{'page_label': '17', 'file_name': 'FAQs for I...",True
2,Here's a rewritten version of the question:\n\...,"[point, to maintain nonimmigrant status your c...",USCIS has taken several actions to help those ...,reasoning,"[{'page_label': '17', 'file_name': 'FAQs for I...",True
3,How will the new selection approach affect reg...,"[On Jan. 30, 2024, USCIS announced a final rul...",The new selection approach will reduce registr...,multi_context,"[{'page_label': '2', 'file_name': 'H-1B Electr...",True
4,Here is the question that can be fully answere...,[Home> Working in the United States> Temporary...,Eligibility criteria for eligible students und...,simple,"[{'page_label': '1', 'file_name': 'Extension o...",True


In [24]:
test_df.to_csv('Test set from RAGAS.csv')

In [26]:
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from ragas.metrics.critique import harmfulness

metrics = [
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
    harmfulness,
]

In [27]:
ds = testset.to_dataset()

ds_dict = ds.to_dict()
ds_dict["question"]
ds_dict["ground_truth"]

['USCIS announcing process enhancements for deferred action requests by workers, including H-1B workers, to support labor and employment agency investigations.',
 'USCIS has taken several actions to help those who will be waiting a long time for an “immediately available” immigrant visa number, including issuing an unprecedented number of employment-based green cards in fiscal years 2022 and 2023, increasing the maximum validity period of Employment Authorization Documents (EADs) to 5 years for adjustment of status applicants and bringing back “combo cards” that provide evidence of both employment authorization and advance parole. Additionally, USCIS has expanded premium processing to all filers of Form I-140, Immigrant Petition for Noncitizen Workers, as well as certain filers of Form I-765, Application for Employment Authorization, and Form I-539, Application to Extend/Change Nonimmigrant Status.',
 'USCIS has taken several actions to help those who will be waiting a long time for an

In [30]:
evaluator_llm = Ollama(model="llama3.1", request_timeout=360.0)

### Hybrid search RAG score

In [33]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ds_dict,
    #llm=evaluator_llm,
    embeddings=Settings.embed_model,
    raise_exceptions=False,
    #run_config=RunConfig(max_retries=3, max_wait=20)
)

Running Query Engine:   0%|          | 0/5 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]

Task exception was never retrieved
future: <Task finished name='Task-593' coro=<as_completed.<locals>.sema_coro() done, defined at /Users/divyahegde/anaconda3/lib/python3.10/site-packages/ragas/executor.py:32> exception=TimeoutError()>
Traceback (most recent call last):
  File "/Users/divyahegde/anaconda3/lib/python3.10/asyncio/tasks.py", line 232, in __step
    result = coro.send(None)
  File "/Users/divyahegde/anaconda3/lib/python3.10/site-packages/ragas/metrics/_context_precision.py", line 160, in _ascore
    results = await self.llm.generate(
  File "/Users/divyahegde/anaconda3/lib/python3.10/site-packages/ragas/llms/base.py", line 95, in generate
    return await agenerate_text_with_retry(
  File "/Users/divyahegde/anaconda3/lib/python3.10/site-packages/tenacity/_asyncio.py", line 88, in async_wrapped
    return await fn(*args, **kwargs)
  File "/Users/divyahegde/anaconda3/lib/python3.10/site-packages/tenacity/_asyncio.py", line 47, in __call__
    do = self.iter(retry_state=retry

In [34]:
print(result)

{'faithfulness': 0.8256, 'answer_relevancy': 0.7588, 'context_precision': 0.9697, 'context_recall': 1.0000, 'harmfulness': 0.2000}


### Vector search only model scores

In [35]:
result_vector_only = evaluate(
    query_engine=query_engine_vector,
    metrics=metrics,
    dataset=ds_dict,
    #llm=evaluator_llm,
    embeddings=Settings.embed_model,
    raise_exceptions=False,
    #run_config=RunConfig(max_retries=3, max_wait=20)
)

Running Query Engine:   0%|          | 0/5 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]

In [36]:
print(result_vector_only)

{'faithfulness': 0.4400, 'answer_relevancy': 0.6311, 'context_precision': 0.9509, 'context_recall': 1.0000, 'harmfulness': 0.0000}
