# ReRank

## Setup

Load needed API keys and relevant Python libaries.

In [None]:
!pip install cohere 
!pip install weaviate-client

In [None]:
import os

def keyword_search(query, 
                   client,
                   results_lang='en', 
                   properties = ["text", "title", "url", "views", "lang", "_additional {distance}"],
                   num_results=3):

    where_filter = {
    "path": ["lang"],
    "operator": "Equal",
    "valueString": results_lang
    }

    response = (
        client.query.get("Articles", properties)
        .with_bm25(
          query=query
        )
        .with_where(where_filter)
        .with_limit(num_results)
        .do()
        )
    result = response['data']['Get']['Articles']
    return result


def dense_retrieval(query, 
                    client,
                    results_lang='en', 
                    properties = ["text", "title", "url", "views", "lang", "_additional {distance}"],
                    num_results=5):

    nearText = {"concepts": [query]}
    
    # To filter by language
    where_filter = {
    "path": ["lang"],
    "operator": "Equal",
    "valueString": results_lang
    }
    response = (
        client.query
        .get("Articles", properties)
        .with_near_text(nearText)
        .with_where(where_filter)
        .with_limit(num_results)
        .do()
    )

    result = response['data']['Get']['Articles']

    return result


def print_result(result):
    """ Print results with colorful formatting """
    for i,item in enumerate(result):
        print(f'item {i}')
        for key in item.keys():
            print(f"{key}:{item.get(key)}")
            print()
        print()

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("COHERE_API_KEY")
secret_value_1 = user_secrets.get_secret("WEAVIATE_API_KEY")

In [None]:
import cohere
co = cohere.Client(secret_value_0)

In [None]:
import weaviate
auth_config = weaviate.auth.AuthApiKey(
    api_key=secret_value_1)

In [None]:
client = weaviate.Client(
    url="XXX",
    auth_client_secret=auth_config,
    additional_headers={
        "X-Cohere-Api-Key": secret_value_0,
    }
)

## Dense Retrieval

In [None]:
query = "What is the capital of Canada?"

In [None]:
dense_retrieval_results = dense_retrieval(query, client)

In [None]:
from utils import print_result

In [None]:
print_result(dense_retrieval_results)

## Improving Keyword Search with ReRank

In [None]:
from utils import keyword_search

In [None]:
query_1 = "What is the capital of Canada?"

In [None]:
query_1 = "What is the capital of Canada?"
results = keyword_search(query_1,
                         client,
                         properties=["text", "title", "url", "views", "lang", "_additional {distance}"],
                         num_results=3
                        )

for i, result in enumerate(results):
    print(f"i:{i}")
    print(result.get('title'))
    print(result.get('text'))

In [None]:
query_1 = "What is the capital of Canada?"
results = keyword_search(query_1,
                         client,
                         properties=["text", "title", "url", "views", "lang", "_additional {distance}"],
                         num_results=500
                        )

for i, result in enumerate(results):
    print(f"i:{i}")
    print(result.get('title'))
    #print(result.get('text'))

In [None]:
def rerank_responses(query, responses, num_responses=10):
    reranked_responses = co.rerank(
        model = 'rerank-english-v2.0',
        query = query,
        documents = responses,
        top_n = num_responses,
        )
    return reranked_responses

In [None]:
texts = [result.get('text') for result in results]
reranked_text = rerank_responses(query_1, texts)

In [None]:
for i, rerank_result in enumerate(reranked_text):
    print(f"i:{i}")
    print(f"{rerank_result}")
    print()

## Improving Dense Retrieval with ReRank

In [None]:
from utils import dense_retrieval

In [None]:
query_2 = "Who is the tallest person in history?"

In [None]:
results = dense_retrieval(query_2,client)

In [None]:
for i, result in enumerate(results):
    print(f"i:{i}")
    print(result.get('title'))
    print(result.get('text'))
    print()

In [None]:
texts = [result.get('text') for result in results]
reranked_text = rerank_responses(query_2, texts)

In [None]:
for i, rerank_result in enumerate(reranked_text):
    print(f"i:{i}")
    print(f"{rerank_result}")
    print()