# Sparse, Dense and Hybrid Search

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

### Download sample data

In [1]:
import requests
import json

# Download the data
resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
data = json.loads(resp.text)


print(type(data), len(data))

def json_print(data):
    print(json.dumps(data, indent=2))

<class 'list'> 10


In [3]:
import os

import weaviate
from weaviate.embedded import EmbeddedOptions


client = weaviate.Client(
    embedded_options=EmbeddedOptions(),
    additional_headers={
        "X-OpenAI-Api-Key": os.environ['OPENAI_API_KEY']
    }
)

embedded weaviate is already listening on port 8079


In [4]:
if client.schema.exists("Question"):
   client.schema.delete_class("Question")
 
class_obj = {
    "class": "Question",
    "vectorizer": "text2vec-openai",
    "moduleConfig": {
        "text2vec-openai": {
            "model": "ada",
            "modelVersion": "002",
            "type": "text",
            "baseURL": os.environ["OPENAI_API_BASE"]
        }
    }
}

client.schema.create_class(class_obj)

In [5]:
with client.batch.configure(batch_size=5) as batch:
    for i, d in enumerate(data):
        
        print(f"importing question: {i+1}")
        
        properties = {
            "answer": d["Answer"],
            "question": d["Question"],
            "category": d["Category"],
        }
        
        batch.add_data_object(
            data_object=properties,
            class_name="Question"
        )

importing question: 1
importing question: 2
importing question: 3
importing question: 4
importing question: 5
{'error': [{'message': 'connection to: OpenAI API failed with status: 404 error: Unknown request URL: POST /v1/v1/embeddings. Please check the URL for typos, or see the docs at https://platform.openai.com/docs/api-reference/.'}]}
{'error': [{'message': 'connection to: OpenAI API failed with status: 404 error: Unknown request URL: POST /v1/v1/embeddings. Please check the URL for typos, or see the docs at https://platform.openai.com/docs/api-reference/.'}]}
{'error': [{'message': 'connection to: OpenAI API failed with status: 404 error: Unknown request URL: POST /v1/v1/embeddings. Please check the URL for typos, or see the docs at https://platform.openai.com/docs/api-reference/.'}]}
{'error': [{'message': 'connection to: OpenAI API failed with status: 404 error: Unknown request URL: POST /v1/v1/embeddings. Please check the URL for typos, or see the docs at https://platform.openai

## Queries

### Dense Search

In [6]:
response = (
    client.query
    .get("Question", ["question", "answer"])
    .with_near_text({"concepts":["animal"]})
    .with_limit(3)
    .do()
)

json_print(response)

{
  "data": {
    "Get": {
      "Question": null
    }
  },
  "errors": [
    {
      "locations": [
        {
          "column": 6,
          "line": 1
        }
      ],
      "message": "explorer: get class: concurrentTargetVectorSearch): explorer: get class: vectorize search vector: vectorize params: vectorize params: vectorize keywords: remote client vectorize: connection to: OpenAI API failed with status: 404 error: Unknown request URL: POST /v1/v1/embeddings. Please check the URL for typos, or see the docs at https://platform.openai.com/docs/api-reference/.",
      "path": [
        "Get",
        "Question"
      ]
    }
  ]
}


### Sparse Search - BM25

In [7]:
response = (
    client.query
    .get("Question",["question","answer"])
    .with_bm25(query="animal")
    .with_limit(3)
    .do()
)

json_print(response)

{
  "data": {
    "Get": {
      "Question": []
    }
  }
}


### Hybrid Search

In [8]:
response = (
    client.query
    .get("Question",["question","answer"])
    .with_hybrid(query="animal", alpha=0.5)
    .with_limit(3)
    .do()
)

json_print(response)

{
  "data": {
    "Get": {
      "Question": null
    }
  },
  "errors": [
    {
      "locations": [
        {
          "column": 6,
          "line": 1
        }
      ],
      "message": "remote client vectorize: connection to: OpenAI API failed with status: 404 error: Unknown request URL: POST /v1/v1/embeddings. Please check the URL for typos, or see the docs at https://platform.openai.com/docs/api-reference/.",
      "path": [
        "Get",
        "Question"
      ]
    }
  ]
}


In [9]:
response = (
    client.query
    .get("Question",["question","answer"])
    .with_hybrid(query="animal", alpha=0)
    .with_limit(3)
    .do()
)

json_print(response)

{
  "data": {
    "Get": {
      "Question": []
    }
  }
}


In [10]:
response = (
    client.query
    .get("Question",["question","answer"])
    .with_hybrid(query="animal", alpha=1)
    .with_limit(3)
    .do()
)

json_print(response)

{
  "data": {
    "Get": {
      "Question": null
    }
  },
  "errors": [
    {
      "locations": [
        {
          "column": 6,
          "line": 1
        }
      ],
      "message": "remote client vectorize: connection to: OpenAI API failed with status: 404 error: Unknown request URL: POST /v1/v1/embeddings. Please check the URL for typos, or see the docs at https://platform.openai.com/docs/api-reference/.",
      "path": [
        "Get",
        "Question"
      ]
    }
  ]
}
