In [104]:
%env USE_GOOGLE_AUTH=True
%env GEMINI_KEY=""
%env GOOGLE_APIKEY=""

import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  # Load data
    
def json_print(data):
    print(json.dumps(data, indent=2))
    
json_print(data)

env: USE_GOOGLE_AUTH=True
env: GEMINI_KEY=""
env: GOOGLE_APIKEY=""
[
  {
    "Category": "SCIENCE",
    "Question": "This organ removes excess glucose from the blood & stores it as glycogen",
    "Answer": "Liver"
  },
  {
    "Category": "ANIMALS",
    "Question": "It's the only living mammal in the order Proboseidea",
    "Answer": "Elephant"
  },
  {
    "Category": "ANIMALS",
    "Question": "The gavial looks very much like a crocodile except for this bodily feature",
    "Answer": "the nose or snout"
  },
  {
    "Category": "ANIMALS",
    "Question": "Weighing around a ton, the eland is the largest species of this animal in Africa",
    "Answer": "Antelope"
  },
  {
    "Category": "ANIMALS",
    "Question": "Heaviest of all poisonous snakes is this North American rattlesnake",
    "Answer": "the diamondback rattler"
  },
  {
    "Category": "SCIENCE",
    "Question": "2000 news: the Gunnison sage grouse isn't just another northern sage grouse, but a new one of this classificatio

In [91]:
import weaviate
from weaviate import EmbeddedOptions
import os
from weaviate.connect import ConnectionParams

GEMINI_KEY = os.getenv("GEMINI_KEY")  # Ensure this is set

# Connect to Weaviate without authentication
client = weaviate.connect_to_local(
    host="localhost",
    port=8080,
    grpc_port=50051,
    headers={"X-Goog-Studio-Api-Key": GEMINI_KEY,"GOOGLE_APIKEY":GEMINI_KEY}  # Gemini key for AI Studio
)

# Check connection status
if client.is_live() and client.is_ready():
    print("Weaviate is live and ready!")
else:
    print("Connection failed")

# Check that Weaviate is up and live
if client.is_live():
    print("Weaviate is live!")
else:
    print("Weaviate is not reachable.")



Weaviate is live and ready!
Weaviate is live!


In [98]:
if client.collections.exists("Question"):
    client.collections.delete("Question")

In [99]:
from weaviate.classes.config import Configure, Property, DataType, VectorDistances

# Define the collection name and properties
collection_name = "Question"

# Define properties with correct field names
properties = [
    Property(name="question", data_type=DataType.TEXT),
    Property(name="answer", data_type=DataType.TEXT),
    Property(name="category", data_type=DataType.TEXT)
]

# Create the collection with properly configured vectorizer and vector index
client.collections.create(
    name=collection_name,
    properties=properties,
    generative_config=Configure.Generative.google(model_id="gemini-pro",project_id="633437549115"),
    vectorizer_config=Configure.Vectorizer.text2vec_transformers(),
    vector_index_config=Configure.VectorIndex.hnsw(
        distance_metric=VectorDistances.COSINE
    )
)


<weaviate.collections.collection.sync.Collection at 0x1a0f53f0230>

In [100]:
#Insert the data into Weaviate
collection = client.collections.get("Question")
print(client.is_live())
with client.batch.dynamic() as batch:
    for o in data:
        obj_body = {
            'question': o["Question"],
            'answer': o["Answer"],
            'round': o["Category"]
        }
        collection.data.insert(properties=obj_body)
        

True


In [101]:
response = collection.aggregate.over_all(total_count=True)
print(response.total_count)
print()

10



### Lets run a vector search to see whats comes back

In [102]:
from weaviate.classes.query import MetadataQuery, Filter

response = collection.generate.near_text(query="animals", limit=2)

for o in response.objects:
    print(o.properties)
    print(o.metadata.distance)
    
# print(json.dumps(response, indent=2))

{'round': 'ANIMALS', 'answer': 'Elephant', 'question': "It's the only living mammal in the order Proboseidea", 'category': None}
None
{'round': 'ANIMALS', 'answer': 'Antelope', 'question': 'Weighing around a ton, the eland is the largest species of this animal in Africa', 'category': None}
None


### No we want to pass each of these objects to a LLM individually to use when answering a prompt!

In [103]:
#Write a prompt that will be passed in the returend object above.

prompt= "Tell me a story about this animal {answer} flying! in 30 words"

from weaviate.classes.query import MetadataQuery, Filter

response = collection.generate.near_text(query="animals", limit=2,single_prompt=prompt)

for o in response.objects:
    print(o.properties)
    print(o.metadata.distance)
    
# print(json.dumps(response, indent=2))

# ADD CODE HERE

WeaviateQueryError: Query call with protocol GRPC search failed with message <AioRpcError of RPC that terminated with:
	status = StatusCode.UNKNOWN
	details = "Google API Key: no api key found neither in request header: X-Palm-Api-Key or X-Goog-Api-Key or X-Goog-Vertex-Api-Key or X-Goog-Studio-Api-Key nor in environment variable under PALM_APIKEY or GOOGLE_APIKEY"
	debug_error_string = "UNKNOWN:Error received from peer  {grpc_message:"Google API Key: no api key found neither in request header: X-Palm-Api-Key or X-Goog-Api-Key or X-Goog-Vertex-Api-Key or X-Goog-Studio-Api-Key nor in environment variable under PALM_APIKEY or GOOGLE_APIKEY", grpc_status:2, created_time:"2025-02-09T19:07:23.6387505+00:00"}"
>.

In [None]:
#Write a query to perform RAG

# ADD CODE HERE

json_print(response)

### Lets extract all the categories

In [None]:
response = (client.query
            .get("Question", 'category')
            .with_near_text({'concepts':'animals'})
            .with_limit(10)
            .do()
           )

json_print(response)

### Now we'll pass all of these in at the same time for a LLM to generate a grouped answer.

In [None]:
#Write a prompt that requires information from all returned objects

# ADD CODE HERE

In [None]:
#write a query that generates a grouped response

# ADD CODE HERE

json_print(response)