# Sparse Dense and Hybrid Search

In [None]:
import requests
import json

resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)  

print(type(data), len(data))

def json_print(data):
    print(json.dumps(data, indent=2))

In [None]:
import weaviate, os
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv
import openai

load_dotenv()

openai.api_key = os.getenv('OPENAI_API_KEY')

client = weaviate.Client(
    embedded_options=EmbeddedOptions(),
    additional_headers={
        "X-OpenAI-Api-Key": openai.api_key
    }
)

print(f"Client created? {client.is_ready()}")

In [None]:
if client.schema.exists("Question"):
    client.schema.delete_class("Question")
class_obj = {
    "class": "Question",
    "vectorizer": "text2vec-openai",  
}

client.schema.create_class(class_obj)

In [None]:
with client.batch.configure(batch_size=5) as batch:
    for i, d in enumerate(data):  
        
        print(f"importing question: {i+1}")
        
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }
        
        batch.add_data_object(
            data_object=properties,
            class_name="Question"
        )

# Dense Search

In [None]:
response = (
    client.query
    .get("Question", ["question", "answer"])
    .with_near_text({"concepts":["animal"]})
    .with_limit(3)
    .do()
)

json_print(response)

## Sparse Search - BM25

In [None]:
response = (
    client.query
    .get("Question", ["question", "answer"])
    .with_bm25(query="animal")
    .with_limit(3)
    .do()
)

json_print(response)

## Hybrid search

In [None]:
response = (
    client.query
    .get("Question", ["question", "answer"])
    .with_hybrid(query="animal",alpha=0.5)
    .with_limit(3)
    .do()
)

json_print(response)

In [None]:
response = (
    client.query
    .get("Question", ["question", "answer"])
    .with_hybrid(query="animal",alpha=0)
    .with_limit(3)
    .do()
)

json_print(response)

In [None]:
response = (
    client.query
    .get("Question", ["question", "answer"])
    .with_hybrid(query="animal",alpha=1)
    .with_limit(3)
    .do()
)

json_print(response)