In [1]:
%load_ext autoreload
%autoreload 2

In [106]:
import sys
sys.path.append('../')

from dotenv import load_dotenv
envs = load_dotenv('../.env', override=True)

from src.database.utils import get_weaviate_client
from src.llm.llm_interface import LLM
from src.llm.prompt_templates import huberman_system_prompt
from app_features import generate_prompt_series, demo_gen_prompt_series
import os
import tiktoken

### Set Constants
---

### Weaviate

In [111]:
#get weaviate client

weave_client = get_weaviate_client()
collection_name = weave_client.show_all_collections()[0]
print(collection_name)

Huberman_minilm_256


### LLM

In [116]:
model_name = "gpt-3.5-turbo-0125"
claude_model = "claude-3-haiku-20240307"

In [117]:
claude = LLM(model_name=claude_model, api_key=os.environ['ANTHROPIC_API_KEY'])
turbo = LLM()

In [118]:
encoder = tiktoken.get_encoding('cl100k_base')

### Retrieval

In [112]:
query = 'What is Dr. Cal Newport best known for'

results = weave_client.hybrid_search(request=query,
                                     collection_name=collection_name,
                                     return_properties=['content', 'title', 'videoId']
                                    )

### Prompt Engineering

In [113]:
show_summary = '''
In this episode, my guest is Dr. Cal Newport, Ph.D., a professor of computer science at Georgetown University and bestselling author of numerous books on focus and productivity and how to access the deepest possible layers of your cognitive abilities in order to do quality work and lead a more balanced life. We discuss how to avoid digital distraction, specific systems to best arrange and update your schedule, and how to curate your work and home environment. We discuss how to engage with smartphones and technology, the significant productivity cost of task-switching, and how to avoid and overcome burnout. This episode provides specific protocols for enhancing focus and productivity, time management, task prioritization, and improving work-life balance that ought to be useful for anyone, young or old, regardless of profession.
'''.strip()
guest = "Dr. Cal Newport"

In [114]:
assistant_message = demo_gen_prompt_series(query, results[:3], guest, show_summary)

### LLM Call

In [123]:
completion = turbo.get_chat_completion(system_message=huberman_system_prompt,
                                       assistant_message=assistant_message,
                                       temperature=1.0,
                                       raw_response=False
                                       )

In [125]:
completion

'Dr. Cal Newport is best known for his work on focus, productivity, and deep work. He has authored books such as "Deep Work: Rules for Focus Success in a Distracted World" which provide insights and strategies on how to access deep cognitive abilities, avoid distractions, and enhance productivity. Additionally, he is known for promoting techniques to limit digital distractions and optimize work environments for better cognitive performance.'

In [132]:
anthro_completion = claude.get_chat_completion(system_message=huberman_system_prompt,
                                               assistant_message=assistant_message,
                                               temperature=1.0,
                                               raw_response=False)

In [134]:
from rich import print
print(anthro_completion.strip())

In [46]:
context1 = '''
Another benefit of gRPC is its enhanced type safety, achieved through the explicit definition of data structures and types in protobufs. This approach significantly reduces common challenges encountered when working with GraphQL and JSON, such as ambiguity in data types and difficulties in ensuring readability.
In the context of JSON and GraphQL, determining the correct data type for numbers (e.g., distinguishing between integers and floating-point numbers) or interpreting empty properties can be problematic. In Weaviate, this can lead to AutoSchema inferring inappropriate data types, potentially leading to data integrity issues.
Moreover, parsing complex and deeply nested JSON responses, or crafting intricate GraphQL queries, can degrade the developer experience and elevate the risk of errors. This is also partly due to having to conform to the structure of the GraphQL schema. This has been a challenge for certain tasks, such as implementing GroupBy queries and responses in Weaviate.
gRPC addresses these issues head-on by facilitating the customization of data structures for both requests and responses. Its reliance on strictly defined data types streamlines parsing processes and boosts reliability.
'''

context2 = '''
Vectors are a great way to represent meaning. Vectors are arrays of elements that can capture meaning from different data types, such as texts, images, videos, and other content. The elements are called dimensions. High dimension vectors capture more information, but they are harder to work with.
Vector databases make it easier to work with high dimensional vectors. Consider search; Vector databases efficiently measure semantic similarity between data objects. When you run a similarity search, a vector database like Weaviate uses a vectorized version of the query to find objects in the database that have vectors similar to the query vector.
Vectors are like coordinates in a multi-dimensional space. A very simple vector might represent objects, words in this case, in a 2-dimensional space.
In the graph below, the words Apple and Banana are shown close to each other. Newspaper and Magazine are also close to each other, but they are far away from Apple and Banana in the same vector space.
Within each pair, the distance between words is small because the objects have similar vector representations. The distance between the pairs is larger because the difference between the vectors is larger. Intuitively, fruits are similar to each other, but fruits are not similar to reading material.
'''

In [48]:
prompts = [context1, context2]
messages = [{"role": "system", "content": "You are a highly experienced data annotator.  Your job is to create two questions that can be answered from the provided context."},
            {"role": "assistant", "content": context}]

In [49]:
# cage = []
# for chunk in completion(model='claude-3-haiku-20240307', messages=messages, temperature=1.0, stream=True):
#     cage.append(chunk['choices'][0]['delta']['content'].strip())
#     if any(cage):
#         print(' '.join(cage))
#         sleep(0.05)

In [56]:
async def async_task(message: list[dict]):
    response = await acompletion(model="gpt-3.5-turbo-1106", messages=message, temperature=1.0)
    return response

In [64]:
%%time
async def gather(prompts: list[str]):
    tasks = []
    for p in prompts:
        messages = [{"role": "system", "content": "You are a highly experienced data annotator.  Your job is to create two questions that can be answered from the provided context."},
                    {"role": "assistant", "content": prompt.format(context=p)}]
        tasks.append(async_task(messages))
    asyncio.gather(*tasks)

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 6.91 µs


In [65]:
%%time
responses = asyncio.run(gather(prompts))

CPU times: user 328 µs, sys: 109 µs, total: 437 µs
Wall time: 416 µs


In [63]:
responses