##### Install Dependencies



In [4]:
!python -m pip install --upgrade pip



In [None]:
!python -m pip install -U weaviate-client==4.7.1
!python -m pip install python-dotenv==1.0.0
!python -m pip install openai==1.54.3
!python -m pip install assemblyai==0.35.1

Collecting python-dotenv==1.0.1
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
  Attempting uninstall: python-dotenv
    Found existing installation: python-dotenv 1.0.0
    Uninstalling python-dotenv-1.0.0:
      Successfully uninstalled python-dotenv-1.0.0
Successfully installed python-dotenv-1.0.1


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
goldenverba 2.0.0 requires python-dotenv==1.0.0, but you have python-dotenv 1.0.1 which is incompatible.
goldenverba 2.0.0 requires weaviate-client==4.7.1, but you have weaviate-client 4.9.3 which is incompatible.




##### Environmental Variables & Configurations

**FIRST** Create your .env file in this folder.  Use the .env.example file as a guide.

In [None]:
# Import Environmental Variables

#Environmental variables
import os
from dotenv import load_dotenv
load_dotenv()

#Keys and URLs for Embedding and Generative Models
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')
OPENAI_EMBEDDING_URL = os.getenv('OPENAI_EMBEDDING_URL')
OPENAI_GENERATION_URL = os.getenv('OPENAI_GENERATION_URL')

#Keys and URLS for Vector Databases
WEAVIATE_URL = os.getenv('WEAVIATE_URL')
WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY')

print(len(WEAVIATE_URL)>20)
print(len(WEAVIATE_API_KEY)>20)
print(len(OPENAI_API_KEY)>20)

In [None]:
# Embedding
EMBEDDING_MODEL = "text-embedding-3-small"
ENCODING_FORMAT = "float"

# Database
COLLECTION_NAME = "VERBA_Embedding_text_embedding_3_small"

##### Vector Database Connection

In [None]:
# VECTOR DATABASE CONNECTION

from weaviate.classes.init import Auth, AdditionalConfig, Timeout
import weaviate

client_db = weaviate.connect_to_weaviate_cloud(
    cluster_url = WEAVIATE_URL,
    auth_credentials = Auth.api_key(WEAVIATE_API_KEY),
    additional_config=AdditionalConfig(timeout=Timeout(init=30, query=60, insert=120))  # Values in seconds
)

print(client_db.is_ready())

In [None]:
# for collection in client_db.collections.list_all():
#     print(collection)

In [None]:
# client_db.close()

##### Embedding Service Connection

In [None]:
# Extracts the actual vector embedding from the OpenAI response
# https://platform.openai.com/docs/api-reference/embeddings/create
def openai_extract_vector(
        response
    ) -> list[float]:

    return response.data[0].embedding

In [None]:
# EMBEDDING CONNECTION - OPENAI
# https://platform.openai.com/docs/api-reference/authentication

import openai
from openai import OpenAI

# Set API Key.  Not necessary if you have an 
# OPENAI_API_KEY variable in your environment
openai.api_key = OPENAI_API_KEY 

client_embedding = OpenAI()

In [None]:
# print(type(client_embedding))

# for item in client_embedding.models.list():
#     print(item)

##### Generation Service Connection

In [None]:
# GENERATION CONNECTION - OPENAI
# https://platform.openai.com/docs/api-reference/authentication

import openai
from openai import OpenAI

# Set API Key.  Not necessary if you have an 
# OPENAI_API_KEY variable in your environment
openai.api_key = OPENAI_API_KEY 

generation_client = OpenAI()


##### Simple RAG Implementation

In [None]:
from weaviate.classes.query import MetadataQuery

query_text = "I'd like to know about issues with plumbing in or around 2024"

# Vectorize the query
response_embedding = client_embedding.embeddings.create(
    model = EMBEDDING_MODEL,
    input = query_text,
    encoding_format = ENCODING_FORMAT
)

# Extract the verctor embeddings list[float] from the embedding response
query_vector = openai_extract_vector(response_embedding) 

# Look up the appropriate Weviate database collection - name based on embedding model used
collection = client_db.collections.get('VERBA_Embedding_text_embedding_3_small')

# Send vector query to database and get response
db_response = collection.query.near_vector(
    near_vector=query_vector,
    limit=10,
    return_metadata=MetadataQuery(distance=True)
)

# Extract items from database response 
# and aggregate into a single string 
db_response_content = ""
for item in db_response.objects:
    segment = '\n<ContextSegment' + str(int(item.properties.get('chunk_id'))) + '>\n'
    db_response_content += segment
    db_response_content += item.properties.get('content')

# # Print results
# for item in response_db.objects:
#     print(item.properties)
#     print(item.metadata.distance)

# print(response_content)

In [None]:
# Response Generation with OpenAI
# # https://platform.openai.com/docs/api-reference/chat/create


generation_response = generation_client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {
      "role": "system", 
      "content": f"You are a helpful assistant who uses this context if appropriate: {db_response_content}"
    },
    {
      "role": "user", 
      "content": query_text 
    }
  ]
)

print(generation_response.choices[0].message.content)