##### Install Dependencies



In [1]:
!python -m pip install --upgrade pip



In [2]:
!python -m pip install -U weaviate-client==4.7.1
!python -m pip install python-dotenv==1.0.0
!python -m pip install openai==1.54.3
!python -m pip install assemblyai==0.35.1

Collecting weaviate-client==4.7.1
  Using cached weaviate_client-4.7.1-py3-none-any.whl.metadata (3.3 kB)
Collecting validators==0.33.0 (from weaviate-client==4.7.1)
  Using cached validators-0.33.0-py3-none-any.whl.metadata (3.8 kB)
Using cached weaviate_client-4.7.1-py3-none-any.whl (368 kB)
Using cached validators-0.33.0-py3-none-any.whl (43 kB)
Installing collected packages: validators, weaviate-client
  Attempting uninstall: validators
    Found existing installation: validators 0.34.0
    Uninstalling validators-0.34.0:
      Successfully uninstalled validators-0.34.0
  Attempting uninstall: weaviate-client
    Found existing installation: weaviate-client 4.9.3
    Uninstalling weaviate-client-4.9.3:
      Successfully uninstalled weaviate-client-4.9.3
Successfully installed validators-0.33.0 weaviate-client-4.7.1


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
goldenverba 2.0.0 requires python-dotenv==1.0.0, but you have python-dotenv 1.0.1 which is incompatible.


Collecting python-dotenv==1.0.0
  Using cached python_dotenv-1.0.0-py3-none-any.whl.metadata (21 kB)
Using cached python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
  Attempting uninstall: python-dotenv
    Found existing installation: python-dotenv 1.0.1
    Uninstalling python-dotenv-1.0.1:
      Successfully uninstalled python-dotenv-1.0.1
Successfully installed python-dotenv-1.0.0


##### Environmental Variables & Configurations

**FIRST** Create your .env file in this folder.  Use the .env.example file as a guide.

In [None]:
# Import Environmental Variables

#Environmental variables
import os
from dotenv import load_dotenv
load_dotenv()

#Keys and URLs for Embedding and Generative Models
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')
OPENAI_EMBEDDING_URL = os.getenv('OPENAI_EMBEDDING_URL')
OPENAI_GENERATION_URL = os.getenv('OPENAI_GENERATION_URL')

#Keys and URLS for Vector Databases
WEAVIATE_URL = os.getenv('WEAVIATE_URL')
WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY')

print(len(WEAVIATE_URL)>20)
print(len(WEAVIATE_API_KEY)>20)
print(len(OPENAI_API_KEY)>20)

TypeError: object of type 'NoneType' has no len()

In [None]:
# Embedding
EMBEDDING_MODEL = "text-embedding-3-small"
ENCODING_FORMAT = "float"

# Database
COLLECTION_NAME = "VERBA_Embedding_text_embedding_3_small"

RESPONDING_GENERATIVE_MODEL = "gpt-4o"

TRUSTSAFETY_GENERATIVE_MODEL = "gpt-4o"

##### Vector Database Connection

In [None]:
# VECTOR DATABASE CONNECTION

from weaviate.classes.init import Auth, AdditionalConfig, Timeout
import weaviate

weaviate_client = weaviate.connect_to_weaviate_cloud(
    cluster_url = WEAVIATE_URL,
    auth_credentials = Auth.api_key(WEAVIATE_API_KEY),
    additional_config=AdditionalConfig(timeout=Timeout(init=30, query=60, insert=120))  # Values in seconds
)

print(weaviate_client.is_ready())

In [None]:
# for collection in weaviate_client.collections.list_all():
#     print(collection)

In [None]:
# weaviate_client.close()

##### Embedding Service Connection

In [None]:
# Extracts the actual vector embedding from the OpenAI response
# https://platform.openai.com/docs/api-reference/embeddings/create
def openai_extract_vector(
        response
    ) -> list[float]:

    return response.data[0].embedding

In [None]:
# EMBEDDING CONNECTION - OPENAI
# https://platform.openai.com/docs/api-reference/authentication

import openai
from openai import OpenAI

# Set API Key.  Not necessary if you have an 
# OPENAI_API_KEY variable in your environment
openai.api_key = OPENAI_API_KEY 
embedding_client = OpenAI()

In [None]:
# print(type(embedding_client))

# for item in embedding_client.models.list():
#     print(item)

##### Generation Service Connection

In [None]:
# GENERATION CONNECTION - OPENAI
# https://platform.openai.com/docs/api-reference/authentication

import openai
from openai import OpenAI

# Set API Key.  Not necessary if you have an 
# OPENAI_API_KEY variable in your environment
openai.api_key = OPENAI_API_KEY 

generation_client = OpenAI()


##### Simple RAG Implementation

In [None]:
from weaviate.classes.query import MetadataQuery

user_prompt = "I'd like to know about issues with plumbing in or around 2024"

# Vectorize the query
embedding_response = embedding_client.embeddings.create(
    model = EMBEDDING_MODEL,
    input = user_prompt,
    encoding_format = ENCODING_FORMAT
)

# Extract the vector embeddings list[float] from the embedding response
vectorized_query = openai_extract_vector(embedding_response) 

# Look up the appropriate Weviate database collection - name based on embedding model used
db_collection = weaviate_client.collections.get('VERBA_Embedding_text_embedding_3_small')

# Send vector query to database and get response
db_response = db_collection.query.near_vector(
    near_vector=vectorized_query,
    limit=10,
    return_metadata=MetadataQuery(distance=True)
)

# Extract items from database response 
# and aggregate into a single string 
db_response_content = ""
for item in db_response.objects:
    segment = '\n<ContextSegment' + str(int(item.properties.get('chunk_id'))) + '>\n'
    db_response_content += segment
    db_response_content += item.properties.get('content')

# # Print results
# for item in db_response.objects:
#     print(item.properties)
#     print(item.metadata.distance)

# print(db_response_content)

In [None]:
# Response Generation with OpenAI
# # https://platform.openai.com/docs/api-reference/chat/create


generation_response = generation_client.chat.completions.create(
  model=RESPONDING_GENERATIVE_MODEL,
  messages=[
    {
      "role": "system", 
      "content": f"You are a helpful assistant who uses this context if appropriate: {db_response_content}"
    },
    {
      "role": "user", 
      "content": user_prompt 
    }
  ]
)

print(generation_response.choices[0].message.content)