In [None]:
import sys
import time

sys.path.append('../..')

#load from local .env file
from dotenv import load_dotenv, find_dotenv
env = load_dotenv('.env', override=True)

#standard python
from typing import List, Dict, Tuple
import os

# external libraries
from tqdm import tqdm
from rich import print  # nice library that provides improved printing output (overrides default print function)
from openai import OpenAI

# external files
from src.reranker import ReRanker
from src.database.weaviate_interface_v4 import WeaviateWCS
from src.app_dev.app_functions import validate_token_threshold
from src.app_dev.query import (
    parse_context_results,
    CompletedQuery,
    CompletedQueryQueue
)
from tiktoken import Encoding, get_encoding
from src.llm.prompt_templates import (
    question_answering_prompt_series,
    generate_prompt_series,
    huberman_system_message
)
from src.llm.llm_utils import load_azure_openai
from src.llm.llm_interface import LLM

In [None]:
#read env vars from local .env file
api_key = os.environ['WEAVIATE_API_KEY']
url = os.environ['WEAVIATE_ENDPOINT']

#instantiate client
client = WeaviateWCS(url, api_key)

In [None]:
#display available collection names on cluster
client.show_all_collections()

In [None]:
#set collection name to run queries on
collection_name = 'Huberman_minilm_256'

In [None]:
#Initial query
query = 'Does he reference exactly what literature?'

In [None]:
alpha = 0.45
hyb_response = client.hybrid_search(query, collection_name, alpha=alpha, limit=5)

In [None]:
reranker = ReRanker()
reranked_results = reranker.rerank(
            hyb_response, query, apply_sigmoid=False, top_k=5
        )

In [None]:
encoding = get_encoding("cl100k_base")

valid_results = validate_token_threshold(
            ranked_results=reranked_results,
            system_message=question_answering_prompt_series,
            query=query,
            tokenizer=encoding,
            token_threshold=4000,
        )

In [None]:
#Getting this here so have for the initial LLM call that we currently use.
context_series = generate_prompt_series(query, valid_results, 1)

In [None]:
#llm = load_azure_openai('gpt-35-turbo')

llm = LLM('gpt-3.5-turbo-0125')

In [None]:
llm_response = llm.chat_completion(huberman_system_message,
    user_message=context_series,
    temperature=0.5,
    max_tokens=1000)
    
print(llm_response)

In [None]:
# For now, I just want title, guest, and content.
context_results = parse_context_results(valid_results)
# Create a CompletedQuery object
completed_query = CompletedQuery(query, context_results, llm_response)

# Add the CompletedQuery object to the CompletedQueryQueue
# This is how we'll store the last 5 queries.
completed_query_queue = CompletedQueryQueue()
completed_query_queue.add_query(completed_query)

In [None]:
# This is going to be my vague follow-up query.
new_user_query = "Who else discusses that topic?"

# We need to convert the data structure holding the last 5 queries to a string
# so we can send it to the llm.
completed_query_string_list = completed_query_queue.to_string()

# Combine the new vague query with the previous queries and their contexts, and 
# answers to create a new llm prompt.
query_rewrite_prompt = create_llm_prompt(new_user_query, completed_query_string_list)
print(query_rewrite_prompt)

In [None]:
# This is what we'd get currently.
llm_response = llm.chat_completion(huberman_system_message,
    user_message=new_user_query,
    temperature=0.5,
    max_tokens=1000)

print(llm_response)

In [None]:
# I was using this system message but haven't tried using the old one. 

start_time = time.time()

# This is what the new prompts get with the vague question.
llm_response = llm.chat_completion(huberman_system_message,
    user_message=query_rewrite_prompt,
    temperature=0.5,
    max_tokens=1000)

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

print(llm_response)

In [None]:
from collections import deque
from dataclasses import dataclass
from typing import List

@dataclass
class ContextResult:
    title: str
    guest: str
    content: str

@dataclass
class CompletedQuery:
    user_query: str
    context_results_list: List[ContextResult]
    llm_answer: str
    llm_revised_query: str = None

class CompletedQueryQueue:
    def __init__(self):
        self.completed_query_list: deque[CompletedQuery] = deque(maxlen=5)

def format_completed_query_queue(queue: CompletedQueryQueue) -> str:
    formatted_queries = []
    for i, query in enumerate(queue.completed_query_list, 1):
        formatted_results = []
        for j, result in enumerate(query.context_results_list, 1):
            formatted_results.append(
                f"Context Result {j}:\n"
                f"Title: {result.title}\n"
                f"Guest: {result.guest}\n"
                f"Content: {result.content}\n"
            )
        formatted_query = (
            f"Query {i}:\n"
            f"User Query: {query.user_query}\n"
            f"LLM Answer: {query.llm_answer}\n"
            f"LLM Revised Query: {query.llm_revised_query if query.llm_revised_query else 'None'}\n"
            f"{''.join(formatted_results)}"
        )
        formatted_queries.append(formatted_query)
    return "\n\n".join(formatted_queries)

In [None]:
# Example usage
cq1 = CompletedQuery(
    user_query="Who was the first president of the United States?",
    context_results_list=[
        ContextResult(title="Biography of George Washington", guest="Historian A", content="George Washington was the first president..."),
        ContextResult(title="Presidency Overview", guest="Expert B", content="He served from 1789 to 1797...")
    ],
    llm_answer="George Washington was the first president of the United States.",
)

cq2 = CompletedQuery(
    user_query="What were his accomplishments?",
    context_results_list=[
        ContextResult(title="George Washington's Achievements", guest="Historian A", content="He led the Continental Army..."),
        ContextResult(title="Presidential Achievements", guest="Expert B", content="He established many protocols..."),
    ],
    llm_answer="George Washington had many accomplishments including leading the Continental Army to victory...",
)

In [None]:
queue = CompletedQueryQueue()
queue.completed_query_list.extend([cq1, cq2])

formatted_string = format_completed_query_queue(queue)
print(formatted_string)