# Query benchmark
This notebook has for purpose to benchmark the time to execute queries, with different parameters, as the length of the query or the number of returned document.

## Constants:
* DEFAULT_QUERY : Query used as default when the length of the query is constant. Sub-query will be extracted from this query when the length of the query vary
* DEFAULT_TOP_K : Number of document used as default when it is not involved in the current benchmark.
* INVERTED_FILE_PATH : The path to the inverted file

In [None]:
import time

from pyscripts.query import FaginQuery
from pyscripts.query import NaiveQuery
from pyscripts.tokenizer import Tokenizer


DEFAULT_QUERY = "the be to of and a in that have I it for not on with he as you do at this but his by from they we " \
                + "say her she"
DEFAULT_TOP_K = 10
INVERTED_FILE_PATH = "inverted_file/inverted_file_80.if"

## Influence of the length of the query with Fagin's algorithm

In [None]:
max_splitted = DEFAULT_QUERY.split()
number_of_terms = len(max_splitted)

while number_of_terms > 0:
    query = " ".join(max_splitted[:number_of_terms])
    print("Begin to execute queries with {} terms".format(number_of_terms))
    print(query)
    start_time = time.time()
    fagin_query = FaginQuery(query, Tokenizer(), INVERTED_FILE_PATH)
    print(fagin_query.execute(DEFAULT_TOP_K))
    end_time = time.time()
    print("number_of_terms : " + str(number_of_terms) + " time : " + str(end_time - start_time) + "\n")
    number_of_terms -= 1

## Influence of the length of the query with a naive algorithm

In [None]:
max_splitted = DEFAULT_QUERY.split()
number_of_terms = len(max_splitted)

while number_of_terms > 0:
    query = " ".join(max_splitted[:number_of_terms])
    print("Begin to execute queries with {} terms".format(number_of_terms))
    print(query)
    start_time = time.time()
    naive_query = NaiveQuery(query, Tokenizer(), INVERTED_FILE_PATH)
    print(naive_query.execute(DEFAULT_TOP_K))
    end_time = time.time()
    print("number_of_terms : " + str(number_of_terms) + " time : " + str(end_time - start_time) + "\n")
    number_of_terms -= 1

## Influence of the length of the return with Fagin's algorithm

In [None]:
top_k = DEFAULT_TOP_K
query = DEFAULT_QUERY
while top_k > 0:
    print("Begin to execute queries with top_k = {}".format(top_k))
    print(query)
    start_time = time.time()
    fagin_query = FaginQuery(query, Tokenizer(), INVERTED_FILE_PATH)
    print(fagin_query.execute(top_k))
    end_time = time.time()
    print("top_k : " + str(top_k) + " time : " + str(end_time - start_time) + "\n")
    top_k -= 1

## Influence of the length of the return with a naive algorithm

In [None]:
top_k = DEFAULT_TOP_K
query = DEFAULT_QUERY
while top_k > 0:
    print("Begin to execute queries with top_k = {}".format(top_k))
    print(query)
    start_time = time.time()
    naive_query = NaiveQuery(query, Tokenizer(), INVERTED_FILE_PATH)
    print(naive_query.execute(top_k))
    end_time = time.time()
    print("top_k : " + str(top_k) + " time : " + str(end_time - start_time) + "\n")
    top_k -= 1