In [4]:
import gradio as gr
from huggingface_hub import InferenceClient
import weaviate.classes as wvc
import weaviate
from weaviate.auth import AuthApiKey
import logging
import os
import requests
import json
import weaviate

ollama_api_endpoint = os.getenv("OLLAMA_HOST", "http://ollama.ollama")
ollama_vectorizer_model = model = "all-minilm"
ollama_generative_model="llama3.2"

logging.basicConfig(level=logging.INFO)
logging.info(f'OLLAMA_API_ENDPOINT = {ollama_api_endpoint}')

  from .autonotebook import tqdm as notebook_tqdm
INFO:root:OLLAMA_API_ENDPOINT = http://ollama.ollama


In [5]:
def connect_weaviate_embedded():
    logging.basicConfig(level=logging.INFO)
    logging.info('Connecting to Weaviate embedded instance')
    client = weaviate.connect_to_embedded(
        environment_variables={"ENABLE_MODULES": "text2vec-ollama,generative-ollama"},
        version="1.25.6"
    )
    return client

In [6]:
client = connect_weaviate_embedded()

if client.is_ready():
    logging.info('')
    logging.info(f'Found {len(client.cluster.nodes())} Weaviate nodes.')
    logging.info('')
    for node in client.cluster.nodes():
        logging.info(node)
        logging.info('')
    logging.info(f'client.get_meta(): {client.get_meta()}')
else:
    logging.error("Client is not ready")

INFO:root:Connecting to Weaviate embedded instance
INFO:weaviate-client:Started /opt/app-root/src/.cache/weaviate-embedded: process ID 203
{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2024-11-12T20:43:07Z"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2024-11-12T20:43:07Z"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2024-11-12T20:43:07Z"}
{"level":"info","msg":"open cluster service","servers":{"Embedded_at_8079":53779},"time":"2024-11-12T20:43:07Z"}
{"address":"10.128.2.33:53780","level":"info","msg":"starting cloud rpc server ...","time":"2024-11-12T20:43:07Z"}
{"level":"info","msg":"starting raft sub-system ...",

In [7]:
client.collections.delete_all()

INFO:httpx:HTTP Request: GET http://localhost:8079/v1/schema "HTTP/1.1 200 OK"
{"action":"load_all_shards","level":"error","msg":"failed to load all shards: context canceled","time":"2024-11-12T20:43:09Z"}
INFO:httpx:HTTP Request: DELETE http://localhost:8079/v1/schema/Question "HTTP/1.1 200 OK"
{"action":"telemetry_push","level":"info","msg":"telemetry started","payload":"\u0026{MachineID:4c382da9-b68c-4722-80a2-6506cf9b8b32 Type:INIT Version:1.25.6 NumObjects:0 OS:linux Arch:amd64 UsedModules:[generative-ollama text2vec-ollama]}","time":"2024-11-12T20:43:09Z"}


In [8]:
# lets create the collection, specifing our base url accordingling
questions = client.collections.create(
    "Question",
    vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_ollama(
        api_endpoint=ollama_api_endpoint,
        model=ollama_vectorizer_model
    ),
    generative_config=wvc.config.Configure.Generative.ollama(
        api_endpoint=ollama_api_endpoint,
        model=ollama_generative_model
    )
)

resp = requests.get('https://raw.githubusercontent.com/databyjp/wv_demo_uploader/main/weaviate_datasets/data/jeopardy_1k.json')
logging.info(f'Request status_code = {resp.status_code}')

if resp.status_code == 200:
    data = json.loads(resp.text)

    question_objs = list()
    for i, d in enumerate(data):
        question_objs.append({
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
            "air_date": d["Air Date"],
            "round": d["Round"],
            "value": d["Value"]
    })

    logging.info('Importing 1000 Questions...')
    questions = client.collections.get("Question")
    questions.data.insert_many(question_objs)
    logging.info('Finished Importing Questions')

    logging.info(f'Collection: {questions}')
else:
    logging.error('Error getting Jeopardy questions!')
    logging.error(f'status_code = {resp.status_code}, Reason: {resp.reason}')
    

{"action":"hnsw_prefill_cache_async","level":"info","msg":"not waiting for vector cache prefill, running in background","time":"2024-11-12T20:43:14Z","wait_for_cache_prefill":false}
{"level":"info","msg":"Created shard question_EQmVZC1mEZiy in 1.126545ms","time":"2024-11-12T20:43:14Z"}
{"action":"hnsw_vector_cache_prefill","count":1000,"index_id":"main","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2024-11-12T20:43:14Z","took":55671}
INFO:httpx:HTTP Request: POST http://localhost:8079/v1/schema "HTTP/1.1 200 OK"
INFO:root:Request status_code = 403
ERROR:root:Error getting Jeopardy questions!
ERROR:root:status_code = 403, Reason: Forbidden


In [None]:
def respond(query='computers', task='Summarize', limit=1) -> str:
    print(f'\nPerforming generative search, query = {query}, limit = {limit}.')
    print(f'Prompt: {task}')
    print(f'limit = {limit}')
    response = questions.generate.near_text(
        query=query,
        limit=limit,
        grouped_task=task
    )
    return response.generated

In [None]:
with gr.Blocks(title="Search the Jeopardy Vector Database. (powered by Weaviate and Ollama)") as demo:
            gr.Markdown("""# Search and summarize the Jeopardy Vector Database. (Powered by Weaviate and Ollama)""")
            semantic_examples = [
                ["Nature"],
                ["Music"],
                ["Wine"],
                ["Consumer Products"],
                ["Sports"],
                ["Fishing"],
                ["Food"],
                ["Weather"]
            ]
            semantic_input_text = gr.Textbox(label="Enter a search concept or choose an example below:", 
                value=semantic_examples[0][0])
            gr.Examples(semantic_examples, inputs=semantic_input_text, label="Example search concepts:")
            vdb_button = gr.Button(value="Search and Summarize the Jeopardy Vector Database.")
            vdb_button.click(fn=respond, inputs=[semantic_input_text], outputs=gr.Textbox(label="Search Results"))
            

if __name__ == "__main__":
    demo.launch(server_name='0.0.0.0', server_port=8082, share=True)