In [None]:
from dotenv import load_dotenv
import os

import weaviate
from openai import OpenAI

In [None]:
def context_retrieval(client, collection_name, embedding):
    response = client.query.aggregate(collection_name).with_near_vector({
        'vector': embedding,
        'certainty': 0.7
    }).with_limit(5).with_meta().do()
    
    retrieved_texts = [result['path']['text_content'] for result in response['data']['Aggregate'][collection_name]]

    return retrieved_texts

In [None]:
def rag(question, openai_client, weaviate_client, collection_name):
    # generate question embedding
    embedding_response = openai_client.embeddings.create(
        input = [question.replace("\n", " ")],
        model = 'text-embedding-3-small'
    )
    print(embedding_response)
    question_embedding = embedding_response.data[0].embedding

    # retrieved_texts = context_retrieval(weaviate_client, collection_name, question_embedding)
    retrieved_texts = ""
    combined_context = " ".join(retrieved_texts)
    
    # generate answer with context
    response = openai_client.chat.completions.create(
        model = "gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant to answer any question related to Brown University's Computer Science department."},
            {"role": "user", "content": question}
        ]
    )
    print(response)
    
    return response.choices[0].message.content

In [None]:
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")
weaviate_endpoint = os.getenv("WEAVIATE_WCS_URL")

# connect to a WCS instance & openai
# weaviate_client = weaviate.connect_to_wcs(
#     cluster_url = weaviate_endpoint,
#     auth_credentials = weaviate.auth.AuthApiKey(weaviate_api_key),
# )
weaviate_client = None
openai_client = OpenAI(api_key = openai_api_key)

answer = rag(
    question = "What are the general research areas in Brown CS?",
    openai_client = openai_client,
    weaviate_client = weaviate_client,
    collection_name = "CSWebsiteContent"
)

print(answer)