In [1]:
!pip install pip gradio weaviate_client -Uq

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
kfp 2.9.0 requires protobuf<5,>=4.21.1, but you have protobuf 5.29.3 which is incompatible.
kfp-kubernetes 1.3.0 requires protobuf<5,>=4.21.1, but you have protobuf 5.29.3 which is incompatible.
kfp-pipeline-spec 0.4.0 requires protobuf<5,>=4.21.1, but you have protobuf 5.29.3 which is incompatible.[0m[31m
[0m

In [2]:
import gradio as gr
from huggingface_hub import InferenceClient
import weaviate.classes as wvc
import weaviate
from weaviate.auth import AuthApiKey
import logging
import os
import requests
import json
import weaviate

ollama_api_endpoint = os.getenv("OLLAMA_HOST", "http://ollama.ollama")
ollama_vectorizer_model = model = "all-minilm"
ollama_generative_model="granite3-dense:8b"

logging.basicConfig(level=logging.INFO)
logging.info(f'OLLAMA_API_ENDPOINT = {ollama_api_endpoint}')

INFO:root:OLLAMA_API_ENDPOINT = http://ollama.ollama


In [3]:
def connect_weaviate_embedded():
    logging.basicConfig(level=logging.INFO)
    logging.info('Connecting to Weaviate embedded instance')
    client = weaviate.connect_to_embedded(
        environment_variables={"ENABLE_MODULES": "text2vec-ollama,generative-ollama"},
        version="1.25.6"
    )
    return client

In [4]:
client = connect_weaviate_embedded()

if client.is_ready():
    logging.info('')
    logging.info(f'Found {len(client.cluster.nodes())} Weaviate nodes.')
    logging.info('')
    for node in client.cluster.nodes():
        logging.info(node)
        logging.info('')
    logging.info(f'client.get_meta(): {client.get_meta()}')
else:
    logging.error("Client is not ready")

INFO:root:Connecting to Weaviate embedded instance
INFO:weaviate-client:Binary /opt/app-root/src/.cache/weaviate-embedded did not exist. Downloading binary from https://github.com/weaviate/weaviate/releases/download/v1.25.6/weaviate-v1.25.6-Linux-amd64.tar.gz
INFO:weaviate-client:Started /opt/app-root/src/.cache/weaviate-embedded: process ID 563
{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2025-01-21T17:52:59Z"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2025-01-21T17:52:59Z"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2025-01-21T17:52:59Z"}
{"level":"info","msg":"open cluster service","servers":{"Embedded_at_807

In [5]:
client.collections.delete_all()

INFO:httpx:HTTP Request: GET http://localhost:8079/v1/schema "HTTP/1.1 200 OK"


In [6]:
# lets create the collection, specifing our base url accordingling
questions = client.collections.create(
    "Question",
    vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_ollama(
        api_endpoint=ollama_api_endpoint,
        model=ollama_vectorizer_model
    ),
    generative_config=wvc.config.Configure.Generative.ollama(
        api_endpoint=ollama_api_endpoint,
        model=ollama_generative_model
    )
)

resp = requests.get('https://people.redhat.com/bkozdemb/downloads/jeopardy_1k.json')
logging.info(f'Request status_code = {resp.status_code}')

if resp.status_code == 200:
    data = json.loads(resp.text)

    question_objs = list()
    for i, d in enumerate(data):
        question_objs.append({
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
            "air_date": d["Air Date"],
            "round": d["Round"],
            "value": d["Value"]
    })

    logging.info('Importing 1000 Questions...')
    questions = client.collections.get("Question")
    questions.data.insert_many(question_objs)
    logging.info('Finished Importing Questions')

    logging.info(f'Collection: {questions}')
else:
    logging.error('Error getting Jeopardy questions!')
    logging.error(f'status_code = {resp.status_code}, Reason: {resp.reason}')
    

{"action":"hnsw_prefill_cache_async","level":"info","msg":"not waiting for vector cache prefill, running in background","time":"2025-01-21T17:53:03Z","wait_for_cache_prefill":false}
{"level":"info","msg":"Created shard question_xZQprp1wmyU4 in 1.138338ms","time":"2025-01-21T17:53:03Z"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2025-01-21T17:53:03Z","took":53501}
INFO:httpx:HTTP Request: POST http://localhost:8079/v1/schema "HTTP/1.1 200 OK"
INFO:root:Request status_code = 200
INFO:root:Importing 1000 Questions...
INFO:root:Finished Importing Questions
INFO:httpx:HTTP Request: GET http://localhost:8079/v1/schema/Question "HTTP/1.1 200 OK"
INFO:root:Collection: <weaviate.Collection config={
  "name": "Question",
  "description": null,
  "generative_config": {
    "generative": "generative-ollama",
    "model": {
      "apiEndpoint": "http://ollama.ollama",
      "model": "granite3-dense

In [7]:
def respond(query='computers', task='Summarize', limit=1) -> str:
    print(f'\nPerforming generative search, query = {query}, limit = {limit}.')
    print(f'Prompt: {task}')
    print(f'limit = {limit}')
    response = questions.generate.near_text(
        query=query,
        limit=limit,
        grouped_task=task
    )
    return response.generated

In [8]:
with gr.Blocks(title="Search the Jeopardy Vector Database. (powered by Weaviate and Ollama)") as demo:
            gr.Markdown("""# Search and summarize the Jeopardy Vector Database. (Powered by Weaviate and Ollama)""")
            semantic_examples = [
                ["Nature"],
                ["Music"],
                ["Wine"],
                ["Consumer Products"],
                ["Sports"],
                ["Fishing"],
                ["Food"],
                ["Weather"]
            ]
            semantic_input_text = gr.Textbox(label="Enter a search concept or choose an example below:", 
                value=semantic_examples[0][0])
            gr.Examples(semantic_examples, inputs=semantic_input_text, label="Example search concepts:")
            vdb_button = gr.Button(value="Search and Summarize the Jeopardy Vector Database.")
            vdb_button.click(fn=respond, inputs=[semantic_input_text], outputs=gr.Textbox(label="Search Results"))
            

if __name__ == "__main__":
    demo.launch(server_name='0.0.0.0', server_port=8082, share=True)

  from websockets.server import WebSocketServerProtocol


* Running on local URL:  http://0.0.0.0:8082


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8082/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://localhost:8082/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"


* Running on public URL: https://56fd56cd257760ee2f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


INFO:httpx:HTTP Request: HEAD https://56fd56cd257760ee2f.gradio.live "HTTP/1.1 200 OK"



Performing generative search, query = Consumer Products, limit = 1.
Prompt: Summarize
limit = 1


{"action":"restapi_management","level":"info","msg":"Shutting down... ","time":"2025-01-21T17:54:50Z"}
{"action":"restapi_management","level":"info","msg":"Stopped serving weaviate at http://127.0.0.1:8079","time":"2025-01-21T17:54:50Z"}
{"action":"telemetry_push","level":"info","msg":"telemetry terminated","payload":"\u0026{MachineID:4760a033-8bc5-496c-801d-d7d41eee3dbf Type:TERMINATE Version:1.25.6 NumObjects:1000 OS:linux Arch:amd64 UsedModules:[generative-ollama text2vec-ollama]}","time":"2025-01-21T17:54:50Z"}
{"level":"info","msg":"closing raft FSM store ...","time":"2025-01-21T17:54:50Z"}
{"level":"info","msg":"shutting down raft sub-system ...","time":"2025-01-21T17:54:50Z"}
{"level":"info","msg":"transferring leadership to another server","time":"2025-01-21T17:54:50Z"}
{"error":"cannot find peer","level":"error","msg":"transferring leadership","time":"2025-01-21T17:54:50Z"}
{"level":"info","msg":"closing raft-net ...","time":"2025-01-21T17:54:50Z"}
{"level":"info","msg":"closi