##### Install Dependencies



In [None]:
%pip install -U weaviate-client
%pip install python-dotenv
%pip install openai

##### Environmental Variables Setup

**FIRST** Create your .env file in this folder.  Use the .env.example file as a guide.

In [None]:
# Import Environmental Variables

#Environmental variables
import os
from dotenv import load_dotenv
load_dotenv()

#Keys and URLs for Embedding and Generative Models
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')
OPENAI_EMBEDDING_URL = os.getenv('OPENAI_EMBEDDING_URL')
OPENAI_GENERATION_URL = os.getenv('OPENAI_GENERATION_URL')

#Keys and URLS for Vector Databases
WEAVIATE_URL = os.getenv('WEAVIATE_URL_VERBA')
WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY_VERBA')


print(len(WEAVIATE_URL)>20)
print(len(WEAVIATE_API_KEY)>20)
print(len(OPENAI_API_KEY)>20)


##### Vector Database Connection

In [None]:
COLLECTION_NAME = "VERBA_Embedding_text_embedding_3_small"

In [None]:
# VECTOR DATABASE CONNECTION

from weaviate.classes.init import Auth, AdditionalConfig, Timeout
import weaviate

client_db = weaviate.connect_to_weaviate_cloud(
    cluster_url = WEAVIATE_URL,
    auth_credentials = Auth.api_key(WEAVIATE_API_KEY),
    additional_config=AdditionalConfig(timeout=Timeout(init=30, query=60, insert=120))  # Values in seconds
)

print(client_db.is_ready())

In [None]:
for collection in client_db.collections.list_all():
    print(collection)

In [None]:
client_db.close()

##### Embedding Service Connection

In [None]:
# https://platform.openai.com/docs/api-reference/embeddings/create

# Extracts the actual vector embedding from the OpenAI response
def openai_extract_vector(
        response
    ) -> list[float]:

    return response.data[0].embedding


In [None]:
EMBEDDING_MODEL = "text-embedding-3-small"
EMBEDDING_FORMAT = "float"

In [None]:
# EMBEDDING CONNECTION
import openai
from openai import OpenAI

# Set API Key.  Not necessary if you have an 
# OPENAI_API_KEY variable in your environment
openai.api_key = OPENAI_API_KEY 

client_embedding = OpenAI()

print(type(client_embedding))

In [None]:
for item in client_embedding.models.list():
    print(item)

##### Generation Service Connection

In [None]:
# GENERATION CONNECTION

import openai
from openai import OpenAI

# Set API Key.  Not necessary if you have an 
# OPENAI_API_KEY variable in your environment
openai.api_key = OPENAI_API_KEY 

generation_client = OpenAI()


##### Simple RAG Implementation

In [None]:
from weaviate.classes.query import MetadataQuery

query_text = "I'd like to know about issues with plumbing in or around 2024"

# Vectorize the query
response_embedding = client_embedding.embeddings.create(
    model = EMBEDDING_MODEL,
    input = query_text,
    encoding_format ="float"
)

# Extract the verctor embeddings list[float] from the embedding response
query_vector = openai_extract_vector(response_embedding) 

# Look up the appropriate Weviate database collection - name based on embedding model used
collection = client_db.collections.get('VERBA_Embedding_text_embedding_3_small')

# Do vector query
response_db = collection.query.near_vector(
    near_vector=query_vector,
    limit=10,
    return_metadata=MetadataQuery(distance=True)
)

# # Print results
# for item in response_db.objects:
#     print(item.properties)
#     print(item.metadata.distance)


In [None]:
response_content = ""

for item in response_db.objects:
    segment = '\n<ContextSegment' + str(int(item.properties.get('chunk_id'))) + '>\n'
    response_content += segment
    response_content += item.properties.get('content')

# print(response_content)

In [None]:

generation_response = generation_client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {"role": "system", "content": f"You are a helpful assistant. You use the following context to generate a response: {response_content}"},
    {"role": "user", "content": query_text }
  ]
)

print(generation_response.choices[0].message.content)