# Sparse Dense and Hybrid Search

## Keyword/Sparse Search

In [1]:
from rich import print
from rich import pretty
pretty.install()
import os
from dotenv import load_dotenv
load_dotenv()

import weaviate


In [7]:
client = weaviate.Client(
    auth_client_secret = weaviate.AuthApiKey(api_key = os.getenv("WEAVIATE_API_KEY")),

    # 10 million records from 10 different languages. One million records for each language.
    # Each row is a paragraph from wikipedia
    url = "https://cohere-demo.weaviate.network/",
    additional_headers={
        "X-Cohere-Api-Key": os.getenv("COHERE_API_KEY"),

    }
)


In [8]:
client.is_ready()


In [9]:
def keyword_search(query,
    properties = ["title","url","text"],
    results_lang = "en",
    num_results = 3):
    where_filter = {
        "path": ["lang"],
        "operator": "Equal",
        "valueString": results_lang}


    response = (client.query
                .get("Articles", properties)
                .with_bm25(query = query)
                .with_where(where_filter)
                .with_limit(num_results)
                .do())
    result = response["data"]["Get"]["Articles"]
    return result


In [11]:
def print_result(result):
    """ Print results with colorful formatting """
    for i,item in enumerate(result):
        print(f'item {i}')
        for key in item.keys():
            print(f"{key}:{item.get(key)}")
            print()
        print()


In [12]:
query = "What is the most viewed televised event?"
keyword_search_results = keyword_search(query)
print_result(keyword_search_results)


Other languages to try: en, de, fr, es, it, ja, ar, zh, ko, hi

In [13]:
query = "What is the most viewed televised event?"
keyword_search_results = keyword_search(query, results_lang='de')
print_result(keyword_search_results)
