In [None]:
import dotenv
dotenv.load_dotenv()

import sys
import json
from pathlib import Path

# Add the parent directory of the notebook to the Python path
notebook_dir = Path('.').resolve()
project_root = notebook_dir.parent
sys.path.append(str(project_root))

# Add these imports from query.py
import pandas as pd
from transformers import AutoTokenizer
from llama_index.core.postprocessor import LLMRerank
from llama_index.llms.openllm import OpenLLM
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.llms.openllm import OpenLLM
from transformers import AutoTokenizer
from textwrap import dedent

from rag.query import (
    load_data,
    create_retriever,
    get_nodes,
    get_llama3_1_instruct_str,
    print_references
)
from rag._defaults import DEFAULT_SYSTEM_PROMPT

In [None]:
filepath = "/home/liam/Projects/determined-repos/rag-pdf/private/RFQ_Commercial/Petrobras/Prompts_GroundTruth_Answers.xlsx"

# Load the Excel file
df = pd.read_excel(filepath)

# Remove the first column named "Unnamed: 0"
df = df.drop(columns=["Unnamed: 0"])

# Display the first few rows of the dataframe
print(df.head())

# Show the full text of every entry in df["Prompt"]
for prompt in df["Prompt"]:
    print(prompt)
    print("\n")
    print("-" * 100)
    print("\n")

In [None]:
# Add this new function to generate answers
def generate_answer(
    query: str,
    retriever: VectorIndexRetriever,
    reranker: LLMRerank,
    llm: OpenLLM,
    tokenizer: AutoTokenizer,
):
    nodes = get_nodes(query, retriever, reranker)
    print_references(nodes)
    prefix = get_llama3_1_instruct_str(query, nodes, tokenizer)

    output_response = llm.complete(prefix)
    return output_response.text

In [None]:
# Run generate_answer with arguments from test-query-hosted in Makefile
db_path = f"{project_root}/private/test/embedded"
model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
top_k_retriever = 5
chat_model_endpoint = "http://llama-31-70b-jordan.models.mlds-kserve.us.rdlabs.hpecorp.net/v1"
embedding_model_path = "http://embedding-tyler.models.mlds-kserve.us.rdlabs.hpecorp.net/v1"
temp = 0.2
top_p = 0.9
max_new_tokens = 1024
folder = "Petrobras"

index, chunks = load_data(embedding_model_path, db_path)

filters = None
reranker = None
tokenizer = AutoTokenizer.from_pretrained(model_name)

generate_kwargs = {
    "do_sample": True,
    "temperature": temp,
    "top_p": top_p,
}

print(f"\nUsing hosted LLM at: {chat_model_endpoint}\n")
llm = OpenLLM(
    model=model_name,
    api_base=chat_model_endpoint,
    api_key="fake",
    generate_kwargs=generate_kwargs,
    max_tokens=max_new_tokens,
)

In [None]:
from ast import literal_eval

model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
chat_model_endpoint = "http://llama-31-70b-jordan.models.mlds-kserve.us.rdlabs.hpecorp.net/v1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
generate_kwargs = {
    "do_sample": False,
}
llm = OpenLLM(
    model=model_name,
    api_base =chat_model_endpoint,
    api_key="fake",
    generate_kwargs=generate_kwargs,
    max_tokens=200
)
def generate_completion(llm, tokenizer, text, system_prompt=DEFAULT_SYSTEM_PROMPT):

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": text},
    ]
    toks = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
    text = tokenizer.decode(toks)
    return llm.complete(text)

agent_prompt = """
For the question below, decide if it consists of multiple individual questions or if it is a single question. 
{query}

If it is composed of multiple individual questions, provide enough context to each question so that the question remains unambiguous. 
Then, return a python list of the standalone questions.
If it is a single question, return a python list with just that question.

The list should be parsable when passed to ast.literal_eval.

Only return the list and nothing else.
"""

def run_agent(retriever, reranker, llm, tokenizer, query):
    # Get the list of questions
    list_qn_str = generate_completion(llm, tokenizer, agent_prompt.format(query=query)).text
    print("********** Qn list *********")
    print(list_qn_str)
    list_qn = literal_eval(list_qn_str)
    
    # Generate answers for each question
    answers = []
    for question in list_qn:
        answer = generate_answer(
            retriever=retriever,
            reranker=reranker,
            query=dedent(query),
            llm=llm,
            tokenizer=tokenizer
        )
        answers.append(answer)
    
    # If there's only one question, return the answer directly
    if len(answers) == 1:
        return answers[0]
    
    # If there are multiple questions, synthesize the answers
    synthesis_prompt = f"""
    The following questions were asked and answered:
    """
    
    for question, answer in zip(list_qn, answers):
        synthesis_prompt += f"""
    Question: {question}
    Answer: {answer}
    """
    
    synthesis_prompt += f"""
    Please synthesize these answers into a cohesive response that addresses all parts of the original query:
    {query}
    """
    
    final_answer = generate_completion(llm, tokenizer, synthesis_prompt).text
    return final_answer

In [None]:
# Loop through prompts and generate answers
use_agent = False
for alpha in [0.2, 0.5, 0.8]:
    retriever = create_retriever(index=index, alpha=alpha, top_k_retriever=top_k_retriever, filters=filters)
    results = []
    for _, row in list(df.iterrows()):
        query = row['Prompt']
        print("********* QUERY *********")
        print(query)

        if use_agent:
            result = run_agent(
                llm=llm,
                retriever=retriever,
                reranker=None,
                tokenizer=tokenizer,
                query=dedent(query),
            )
        else:
            result = generate_answer(
                dedent(query),
                retriever,
                reranker,
                tokenizer
            )
        results.append(result)
    df[f"genanswer_alpha{alpha}"] = results

In [None]:

for r in results:
    print(r)