In [1]:
## Retrieval augmented generation
import os
from dotenv import load_dotenv
load_dotenv()
import json
from llama_index.core.response.pprint_utils import pprint_response

In [19]:
os.environ["OPENAI_API_KEY"] = "your-api-key"

In [3]:
os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")

In [4]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader
documents=SimpleDirectoryReader("data").load_data()

In [5]:
# documents

In [6]:
index=VectorStoreIndex.from_documents(documents,show_progress=True)

Parsing nodes:   0%|          | 0/269 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/270 [00:00<?, ?it/s]

In [7]:
query_engine=index.as_query_engine()

In [8]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor

retriever=VectorIndexRetriever(index=index,similarity_top_k=4)
postprocessor=SimilarityPostprocessor(similarity_cutoff=0.80)

query_engine=RetrieverQueryEngine(retriever=retriever,
                                  node_postprocessors=[postprocessor])


In [9]:
# pprint_response(response,show_source=True)
# print(response)

In [10]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

def get_response(user_input):
    query_engine = index.as_query_engine()
    QUERY = "generate 3 pairs of question answers on {prompt} in this format:\n QUESTION: Question \n ANSWER: Answer"
    query = QUERY.format(prompt=user_input)
    print(query)
    response = query_engine.query(query)
    print(response)
    return response


In [16]:
def write_to_file(file_name, query):
    response = get_response(query)
    # Splitting the data into lines
    lines = str(response).split("\n")
    
    data_dict = {}
    
    for i in range(0, len(lines), 3):
        question = lines[i].split(": ")[1]
        answer = lines[i+1].split(": ")[1]
        data_dict[question] = answer
    
    with open(file_name, "w") as json_file:
        json.dump(data_dict, json_file, indent=4)
    print(f"JSON file with Q&A pairs saved at {file_name}")

In [17]:
file_name = "f1_working_hours.json"
query = "number of working hours allowed for F1 student for various cases"
write_to_file(file_name, query)

generate 3 pairs of question answers on number of working hours allowed for F1 student for various cases in this format:
 QUESTION: Question 
 ANSWER: Answer
QUESTION: How many working hours are F1 students allowed during the academic year?
ANSWER: F1 students are allowed to work up to 20 hours per week during the academic year.

QUESTION: What is the maximum number of working hours F1 students can work during official school breaks?
ANSWER: F1 students can work full-time (more than 20 hours per week) during official school breaks.

QUESTION: Are F1 students allowed to work off-campus?
ANSWER: F1 students are generally not allowed to work off-campus, unless they have received specific authorization from the designated school official or the U.S. Citizenship and Immigration Services (USCIS).
JSON file with Q&A pairs saved at f1_working_hours.json


In [18]:
file_name = "b1_visa_employment.json"
query = "employment authorization on B1 visa"
write_to_file(file_name, query)

generate 3 pairs of question answers on employment authorization on B1 visa in this format:
 QUESTION: Question 
 ANSWER: Answer
QUESTION: What are the general requirements for obtaining employment authorization on a B1 visa?
ANSWER: The general requirements for obtaining employment authorization on a B1 visa include demonstrating that the work is temporary, unrelated to the individual's profession, and that the individual will not receive payment from a U.S. source.

QUESTION: Can individuals on a B1 visa engage in any form of employment in the United States?
ANSWER: Individuals on a B1 visa are not allowed to engage in traditional employment in the United States. They are only permitted to participate in certain business activities, such as attending meetings, conferences, and negotiating contracts.

QUESTION: How long can an individual on a B1 visa typically stay in the United States for business purposes?
ANSWER: Individuals on a B1 visa can typically stay in the United States for 