In [1]:
import sys
import time

sys.path.append('../..')

#load from local .env file
from dotenv import load_dotenv, find_dotenv
env = load_dotenv('.env', override=True)

#standard python
from typing import List, Dict, Tuple
import os

# external libraries
from tqdm import tqdm
from rich import print  # nice library that provides improved printing output (overrides default print function)

# external files
from src.reranker import ReRanker
from src.database.weaviate_interface_v4 import WeaviateWCS
from src.app_dev.app_functions import validate_token_threshold
from src.app_dev.query import (
    parse_context_results,
    CompletedQuery,
    CompletedQueryQueue,
    create_llm_prompt
)
from tiktoken import Encoding, get_encoding
from src.llm.prompt_templates import (
    question_answering_prompt_series,
    generate_prompt_series,
    huberman_system_message
)
from src.llm.llm_utils import load_azure_openai

/usr/local/lib/python3.10/site-packages/pydantic/_internal/_config.py:284: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
/usr/local/lib/python3.10/site-packages/litellm/proxy/_types.py:167: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.7/migration/
  @root_validator(pre=True)
/usr/local/lib/python3.10/site-packages/litellm/proxy/_types.py:254: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide

In [2]:
#read env vars from local .env file
api_key = os.environ['WEAVIATE_API_KEY']
url = os.environ['WEAVIATE_ENDPOINT']

#instantiate client
client = WeaviateWCS(url, api_key)

In [3]:
#display available collection names on cluster
client.show_all_collections()

['Huberman_subset_minilm_test',
 'Huberman_minilm_128',
 'Huberman_minilm_256',
 'Huberman_minilm_512']

In [4]:
#set collection name to run queries on
collection_name = 'Huberman_minilm_256'

In [5]:
#Initial query
query = 'What episode talks the most about hypertrophy training?'

In [6]:
alpha = 0.45
hyb_response = client.hybrid_search(query, collection_name, alpha=alpha, limit=5)

In [7]:
reranker = ReRanker()
reranked_results = reranker.rerank(
            hyb_response, query, apply_sigmoid=False, top_k=5
        )

  return self.fget.__get__(instance, owner)()


In [8]:
encoding = get_encoding("cl100k_base")

valid_results = validate_token_threshold(
            reranked_results,
            question_answering_prompt_series,
            query=query,
            tokenizer=encoding,
            token_threshold=4000,
            content_field="content"
        )

In [9]:
#Getting this here so have for the initial LLM call that we currently use.
context_series = generate_prompt_series(query, valid_results, 1)

In [10]:
llm = load_azure_openai('gpt-35-turbo')

In [11]:
llm_response = llm.chat_completion(huberman_system_message,
    user_message=context_series,
    temperature=0.5,
    max_tokens=1000)
    
print(llm_response)

In [12]:
# For now, I just want title, guest, and content.
context_results = parse_context_results(valid_results)
# Create a CompletedQuery object
completed_query = CompletedQuery(query, context_results, llm_response)

# Add the CompletedQuery object to the CompletedQueryQueue
# This is how we'll store the last 5 queries.
completed_query_queue = CompletedQueryQueue()
completed_query_queue.add_query(completed_query)

In [20]:
# This is going to be my vague follow-up query.
new_user_query = "Who else discusses that topic?"

# We need to convert the data structure holding the last 5 queries to a string
# so we can send it to the llm.
completed_query_string_list = completed_query_queue.to_string()

# Combine the new vague query with the previous queries and their contexts, and 
# answers to create a new llm prompt.
query_rewrite_prompt = create_llm_prompt(new_user_query, completed_query_string_list)
print(query_rewrite_prompt)

In [21]:
# This is what we'd get currently.
llm_response = llm.chat_completion(huberman_system_message,
    user_message=new_user_query,
    temperature=0.5,
    max_tokens=1000)

print(llm_response)

In [22]:
# I was using this system message but haven't tried using the old one. 

start_time = time.time()

# This is what the new prompts get with the vague question.
llm_response = llm.chat_completion(huberman_system_message,
    user_message=query_rewrite_prompt,
    temperature=0.5,
    max_tokens=1000)

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

print(llm_response)