In [4]:
from tqdm import tqdm

from typing import Dict, List, Tuple

ArticleName = str
Text = str
Term = str
CollectionData = Dict[str, Dict[str,  float]]
RankingParams = {}

PREFIX = '/wiki/'

### 1. Датасет
В качестве документов для поиска, требуется использовать статьи википедии, скачанные в одном из предыдущих домашних заданий.
Реализуйте получение содержимого документа по названию статьи в соответствии с выбранной вами схемы хранения документов. Также, преобразуйте содержимое в текст (не html) любым разумным способом. Постарайтесь при этом вырезать заведомо бесполезные для поиска данные со страницы.

In [5]:
import pickle
import gzip
import html2text

url_path_map_value = {}

with open('url_path_map.p', 'rb') as f:
    url_path_map_value = pickle.load(f)

In [6]:
from bs4 import BeautifulSoup
import re 
import itertools  

def html2text(html):
    soup = BeautifulSoup(html)
    paras = []
    for paragraph in soup.find_all('p'):
        paras.append(str(paragraph.text))
    heads = []
    for head in soup.find_all('span', attrs={'mw-headline'}):
        heads.append(str(head.text))
    text = [val for pair in itertools.zip_longest(paras, heads, fillvalue =' ' ) for val in pair]
    text = ' '.join(text)
    text = re.sub(r"\[.*?\]+", '', text)

    text = text.replace('\n', '')[:-11]
    return text


def get_article_text(article_name: ArticleName) -> Text:
    text = ""
    key = PREFIX + article_name
    if key in url_path_map_value:
        path = url_path_map_value[key]
        if path != '':
            with gzip.open(path, 'rb') as f:
                html = gzip.decompress(f.read()).decode('utf-8')
                title = article_name.replace('_', ' ')
                text = f'{title} {html2text(html)}'
    return text


for article_name in ["Software_Development_Kit", "Gangrene", "COVID-19_pandemic_in_Belarus", "Guitar_Hero:_Aerosmith"]:
    print(f'Article: {article_name}\n{get_article_text(article_name)}\n\n')

Article: Software_Development_Kit
Software Development Kit A software development kit (SDK or "devkit") is usually a set of development tools that allows a software developer to create applications for a certain software package, software framework, hardware platform, computer system, video game console, operating system, or similar platform.    SDKs vary greatly between a simple application programming interface to hardware used to simulate 


Article: Gangrene
Gangrene Gangrene is a serious medical condition that causes the decay and death of body tissue, usually in the extremities such as the fingers, hands, toes, and feet. The two main types of gangrene are dry gangrene and wet gangrene. A third less common type is a form of wet gangrene known as gas gangrene.  A very rare type which affects the internal organs is known as internal gangrene.    Dry gangrene is usually caused by a loss of blood supply to the affected area, such as may happen following an injury which damages the blo

In [7]:
#with multi-processes: AttributeError: Can't get attribute 'get_article_text' on <module '__main__' (built-in)> !!!!

from multiprocessing import Pool

def load_docs(selected_docs_fn: ArticleName, threads: int = 4) -> Dict[ArticleName, Text]:    
    docs = {}
    for line in tqdm(open(selected_docs_fn)):
        article_name = line.strip()
        docs[article_name] = get_article_text(article_name)
    return docs
    
docs = load_docs("./selected_docs.tsv", 32)
print(f'{len(docs)} docs loaded')

15229it [08:55, 28.42it/s]

15229 docs loaded





Чтобы процесс занимал разумное время, поиск требуется производить только по некоторому заданному набору документов. Список целевых статей описан в файле selected_docs.tsv по одной на строку.

### 2. Поиск
Реализуйте разбиение текста на термы.

In [8]:
import string

def make_terms(text: Text) -> List[Term]:
    return text.lower().translate(str.maketrans('', '', string.punctuation)).split()

for text in ["Hello, world!", 
             "Guitar Hero: Aerosmith", 
             "So Far, So Good... So What!", 
             "such as mantı, döner, kebabs, Turkish delight, baklava, börek, köfte, and other foods",
            ]:
    print(f'{text} -> {make_terms(text)}')

Hello, world! -> ['hello', 'world']
Guitar Hero: Aerosmith -> ['guitar', 'hero', 'aerosmith']
So Far, So Good... So What! -> ['so', 'far', 'so', 'good', 'so', 'what']
such as mantı, döner, kebabs, Turkish delight, baklava, börek, köfte, and other foods -> ['such', 'as', 'mantı', 'döner', 'kebabs', 'turkish', 'delight', 'baklava', 'börek', 'köfte', 'and', 'other', 'foods']


Рассчитайте статистики термов и документов, которые понадобятся для реализации моделей tfidf и BM25: частота терма для документа, обратная документная частота терма и прочие.

In [11]:
import math
from collections import Counter

bags_of_words = {k:make_terms(v) for k, v in docs.items()}
bodies = [len(v) for _, v in bags_of_words.items()]
N = len(bodies)
average_len = sum(bodies) / N


def computeTF(word_dict, bag_of_words, term):
    return {term: word_dict[term] / len(bag_of_words)}


def computeIDF(documents):
    return math.log((N + 1) / len(documents))


def computeTFIDF(tfBagOfWords, idfs, term):
    return tfBagOfWords[term] * idfs[term]


def prepare_collection_data(docs: Dict[ArticleName, Text]) -> CollectionData:
    num_of_words = {k:dict(Counter(v)) for k, v in bags_of_words.items()}
    tf_inv_idx = {}

    for title, tf in num_of_words.items():
        for word, val in tf.items():
            d = tf_inv_idx.get(word, {})
            d[title] = val
            tf_inv_idx[word] = d

    return tf_inv_idx


collection_data = prepare_collection_data(docs)

Реализуйте поиск лучших 10 документов в модели tfidf и BM25 с параметрами b = 1, k1 = 1, k2=1.
Строить инвертированный индекс не требуется (но и не запрещается).

In [12]:

def score_BM25(n, f, qf, dl, b, k1, k2):
    K = compute_K(k1, b, dl, average_len)
    first = math.log((N - n + 0.5) / (n + 0.5))
    second = ((k1 + 1) * f) / (K + f)
    third = ((k2+1) * qf) / (k2 + qf)
    return first * second * third


def compute_K(k1, b, dl, avdl):
    return k1 * ((1 - b) + b * dl / avdl)


def search(query: Text, collection_data: CollectionData, ranking_params: RankingParams) -> List[Tuple[ArticleName, float]]:

    c = Counter({})
    terms = make_terms(query)
    type_ = ranking_params.get('type', 'bm25')
    b = ranking_params.get('b', 1)
    k1 = ranking_params.get('k1', 1)
    k2 = ranking_params.get('k2', 1)

    q = dict(Counter(terms))

    if type_ == 'bm25':
        for term in terms:
            doc_dict = collection_data.get(term, {})
            for title, freq in doc_dict.items():
                score = score_BM25(n=len(doc_dict), f=freq, qf=q[term],
                                  dl=len(bags_of_words[title]), b=b, k1=k1, k2=k2
                )
                if title in c:
                    c[title] += score
                else:
                    c[title] = score
    else:
        #tfidf
        for term in terms:
            term_docs = collection_data.get(term, {})
            titles = [title for title, _ in term_docs.items()]
            if len(titles) > 0:
                num_of_words = {title:dict(Counter(bags_of_words[title])) for title in titles}
                tfs = {title:computeTF(num_of_words[title], bags_of_words[title], term) for title in titles}
                idfs = {term: computeIDF(titles)}
                tfidfs = {title: computeTFIDF(tfs[title], idfs, term) for title in titles}
                for title, tfidf in tfidfs.items():
                    if title in c:
                        c[title] += tfidf
                    else:
                        c[title] = tfidf
    return sorted([(title, rank) for title, rank in c.items()], key=lambda x: -x[1])
        

ranking_params = {'b': 1, 'k1': 1, 'k2': 1, 'type': 'bm25'}
for query in ["coronovirus in belarus",
              "who won junior eurovision in 2005",
              "science about full-text search",
             ]:
    result = search(query, collection_data, ranking_params)[:5]
    print(f"[{query}]")
    for article_name, score in result:
        print(f"{score:7.2f}  {article_name}")
    print("\n")

[coronovirus in belarus]
   9.62  COVID-19_pandemic_in_Belarus
   9.17  Daugava_River
   8.92  Bug_River
   7.82  List_of_Nobel_Prize_winners_by_country
   6.69  Jagiellon_dynasty


[who won junior eurovision in 2005]
  26.75  Junior_Eurovision_Song_Contest_2019
  23.86  Junior_Eurovision_Song_Contest_2004
  19.25  Junior_Eurovision_Song_Contest_2014
  19.18  Junior_Eurovision_Song_Contest_2015
  14.86  Eurovision:_Europe_Shine_a_Light


[science about full-text search]
  20.70  Information_retrieval
  10.99  Citizen_science
  10.39  Ask.com
  10.25  Monty_Python_and_the_Holy_Grail
   9.45  The_Saga_of_the_Viking_Women_and_Their_Voyage_to_the_Waters_of_the_Great_Sea_Serpent




### 3. Оптимизация качества
Для измерения качества поиска вам предоставляется список из пар (запрос, название статьи), которая означает, что по данному запросу данная статья является релевантной (а остальные – нерелевантны). Пары описаны в файле queries.tsv по одной на строку.

In [13]:
def load_queries(queries_fn: ArticleName) -> List[Tuple[Text, ArticleName]]:
    queries = []
    for line in open(queries_fn):
        query, answer = line.rstrip().split('\t', 1)
        queries.append((query, answer))
    return queries

queries = load_queries("./queries.tsv")
for query, answer in queries:
    assert answer in docs
    
print(f'{len(queries)} queries loaded')
for query, article_name in queries[:5]:
    print(f'{query} -> {article_name}')

200 queries loaded
animals that have shells and live in water -> Shell_(zoology)
how many different types of scorpions are there -> Scorpion
describe the structure of a scientific name for a species -> Binomial_nomenclature
what are the 3 types of plastids in plant cells -> Plastid
who named the cell and how did he come up with that name -> Cell_theory


Оценим поиск по нескольким метрикам: accuracy – доля запросов, где на первой позиции был найден релевантный документ; accuracy@10 – доля запросов, где релевантный документ попал в первую десятку, mrr@10 – средняя обратная позиция релевантного документа в первой десятке.

In [14]:
def run(title, queries: List[Tuple[Text, ArticleName]], collection_data: CollectionData, ranking_params: RankingParams) -> None:
    accuracy = 0.0
    accuracy10 = 0.0
    rr = 0.0
    processed = 0
    with tqdm(queries) as progress:
        for query, answer in progress:
            result = search(query, collection_data, ranking_params)[:10]
            
            rank = None
            for position, (article_name, score) in enumerate(result):
                if article_name == answer:
                    rank = position + 1
                    break
                
            if rank is not None:
                accuracy += (rank == 1)
                accuracy10 += (rank <= 10)
                rr += 1.0 / rank
                
            processed += 1
            progress.set_description(f'Acc: {accuracy/processed:0.2f}, Acc10: {accuracy10/processed:0.2f}, RR: {rr/processed:0.2f}')
    print(f'{title}\n  Accuracy: {accuracy/processed:0.2f}\n  Accuracy10: {accuracy10/processed:0.2f}\n  RR: {rr/processed:0.2f}')

Сравните реализованные Вами алгоритмы tfidf и BM25 по этим метрикам.

In [15]:
ranking_params = {"type": ""}
run("TFIDF", queries, collection_data, ranking_params)
run("BM25", queries, collection_data, {})

Acc: 0.03, Acc10: 0.17, RR: 0.07: 100%|██████████| 200/200 [05:55<00:00,  1.78s/it]
  0%|          | 0/200 [00:00<?, ?it/s]

TFIDF
  Accuracy: 0.03
  Accuracy10: 0.17
  RR: 0.07


Acc: 0.18, Acc10: 0.47, RR: 0.27: 100%|██████████| 200/200 [00:16<00:00, 12.01it/s]

BM25
  Accuracy: 0.18
  Accuracy10: 0.47
  RR: 0.27





Подберите оптимальные параметры BM25 для этого набора запросов и документов.

In [15]:
import numpy as np

for b in np.linspace(0, 1, 5):
    for k1 in range(6):
        for k2 in range(6):
            ranking_params = {"b": b, "k1": k1, "k2": k2, "type": "bm25"}
            run(str(ranking_params), queries, collection_data, ranking_params)

Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:29<00:00,  6.67it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:23,  8.37it/s]

{'b': 0.0, 'k1': 0, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:28<00:00,  6.99it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:32,  6.07it/s]

{'b': 0.0, 'k1': 0, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:29<00:00,  6.73it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:35,  5.59it/s]

{'b': 0.0, 'k1': 0, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.15it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.78it/s]

{'b': 0.0, 'k1': 0, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.64it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.07it/s]

{'b': 0.0, 'k1': 0, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.62it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.93it/s]

{'b': 0.0, 'k1': 0, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.08, Acc10: 0.26, RR: 0.13: 100%|██████████| 200/200 [00:26<00:00,  7.68it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:23,  8.35it/s]

{'b': 0.0, 'k1': 1, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.08
  Accuracy10: 0.26
  RR: 0.13


Acc: 0.08, Acc10: 0.26, RR: 0.13: 100%|██████████| 200/200 [00:25<00:00,  7.72it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.20it/s]

{'b': 0.0, 'k1': 1, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.08
  Accuracy10: 0.26
  RR: 0.13


Acc: 0.08, Acc10: 0.26, RR: 0.13: 100%|██████████| 200/200 [00:25<00:00,  7.72it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.92it/s]

{'b': 0.0, 'k1': 1, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.08
  Accuracy10: 0.26
  RR: 0.13


Acc: 0.08, Acc10: 0.26, RR: 0.13: 100%|██████████| 200/200 [00:26<00:00,  7.69it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.17it/s]

{'b': 0.0, 'k1': 1, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.08
  Accuracy10: 0.26
  RR: 0.13


Acc: 0.08, Acc10: 0.26, RR: 0.13: 100%|██████████| 200/200 [00:25<00:00,  7.73it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  7.98it/s]

{'b': 0.0, 'k1': 1, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.08
  Accuracy10: 0.26
  RR: 0.13


Acc: 0.08, Acc10: 0.26, RR: 0.13: 100%|██████████| 200/200 [00:26<00:00,  7.63it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.29it/s]

{'b': 0.0, 'k1': 1, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.08
  Accuracy10: 0.26
  RR: 0.13


Acc: 0.09, Acc10: 0.28, RR: 0.14: 100%|██████████| 200/200 [00:25<00:00,  7.70it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.22it/s]

{'b': 0.0, 'k1': 2, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.28
  RR: 0.14


Acc: 0.09, Acc10: 0.28, RR: 0.14: 100%|██████████| 200/200 [00:25<00:00,  7.70it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.55it/s]

{'b': 0.0, 'k1': 2, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.28
  RR: 0.14


Acc: 0.09, Acc10: 0.28, RR: 0.14: 100%|██████████| 200/200 [00:25<00:00,  7.73it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.85it/s]

{'b': 0.0, 'k1': 2, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.28
  RR: 0.14


Acc: 0.09, Acc10: 0.28, RR: 0.14: 100%|██████████| 200/200 [00:26<00:00,  7.65it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.88it/s]

{'b': 0.0, 'k1': 2, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.28
  RR: 0.14


Acc: 0.09, Acc10: 0.28, RR: 0.14: 100%|██████████| 200/200 [00:26<00:00,  7.64it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.05it/s]

{'b': 0.0, 'k1': 2, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.28
  RR: 0.14


Acc: 0.09, Acc10: 0.28, RR: 0.14: 100%|██████████| 200/200 [00:26<00:00,  7.68it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.28it/s]

{'b': 0.0, 'k1': 2, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.28
  RR: 0.14


Acc: 0.09, Acc10: 0.30, RR: 0.15: 100%|██████████| 200/200 [00:26<00:00,  7.64it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.11it/s]

{'b': 0.0, 'k1': 3, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.30
  RR: 0.15


Acc: 0.09, Acc10: 0.31, RR: 0.15: 100%|██████████| 200/200 [00:26<00:00,  7.69it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.28it/s]

{'b': 0.0, 'k1': 3, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.31
  RR: 0.15


Acc: 0.09, Acc10: 0.30, RR: 0.15: 100%|██████████| 200/200 [00:25<00:00,  7.69it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.12it/s]

{'b': 0.0, 'k1': 3, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.30
  RR: 0.15


Acc: 0.09, Acc10: 0.30, RR: 0.15: 100%|██████████| 200/200 [00:26<00:00,  7.64it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:23,  8.40it/s]

{'b': 0.0, 'k1': 3, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.30
  RR: 0.15


Acc: 0.09, Acc10: 0.30, RR: 0.15: 100%|██████████| 200/200 [00:25<00:00,  7.72it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.28it/s]

{'b': 0.0, 'k1': 3, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.30
  RR: 0.15


Acc: 0.09, Acc10: 0.30, RR: 0.15: 100%|██████████| 200/200 [00:26<00:00,  7.67it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.10it/s]

{'b': 0.0, 'k1': 3, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.30
  RR: 0.15


Acc: 0.09, Acc10: 0.32, RR: 0.15: 100%|██████████| 200/200 [00:26<00:00,  7.68it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.96it/s]

{'b': 0.0, 'k1': 4, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.32
  RR: 0.15


Acc: 0.09, Acc10: 0.31, RR: 0.15: 100%|██████████| 200/200 [00:26<00:00,  7.66it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.22it/s]

{'b': 0.0, 'k1': 4, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.09
  Accuracy10: 0.31
  RR: 0.15


Acc: 0.10, Acc10: 0.32, RR: 0.16: 100%|██████████| 200/200 [00:25<00:00,  7.71it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.26it/s]

{'b': 0.0, 'k1': 4, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.32
  RR: 0.16


Acc: 0.10, Acc10: 0.30, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.61it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.04it/s]

{'b': 0.0, 'k1': 4, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.30
  RR: 0.16


Acc: 0.10, Acc10: 0.30, RR: 0.15: 100%|██████████| 200/200 [01:35<00:00,  2.09it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.65it/s]

{'b': 0.0, 'k1': 4, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.30
  RR: 0.15


Acc: 0.10, Acc10: 0.30, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.59it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.80it/s]

{'b': 0.0, 'k1': 4, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.30
  RR: 0.16


Acc: 0.10, Acc10: 0.31, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.60it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.58it/s]

{'b': 0.0, 'k1': 5, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.31
  RR: 0.16


Acc: 0.10, Acc10: 0.32, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.60it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.72it/s]

{'b': 0.0, 'k1': 5, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.32
  RR: 0.16


Acc: 0.10, Acc10: 0.32, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.61it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.85it/s]

{'b': 0.0, 'k1': 5, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.32
  RR: 0.16


Acc: 0.10, Acc10: 0.32, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.62it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.64it/s]

{'b': 0.0, 'k1': 5, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.32
  RR: 0.16


Acc: 0.10, Acc10: 0.32, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.58it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.65it/s]

{'b': 0.0, 'k1': 5, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.32
  RR: 0.16


Acc: 0.10, Acc10: 0.32, RR: 0.16: 100%|██████████| 200/200 [00:26<00:00,  7.61it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.74it/s]

{'b': 0.0, 'k1': 5, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.10
  Accuracy10: 0.32
  RR: 0.16


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:25<00:00,  7.73it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.77it/s]

{'b': 0.25, 'k1': 0, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:25<00:00,  7.74it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.76it/s]

{'b': 0.25, 'k1': 0, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:25<00:00,  7.72it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.76it/s]

{'b': 0.25, 'k1': 0, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:25<00:00,  7.71it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.55it/s]

{'b': 0.25, 'k1': 0, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:25<00:00,  7.73it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.84it/s]

{'b': 0.25, 'k1': 0, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.69it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.14it/s]

{'b': 0.25, 'k1': 0, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.17, Acc10: 0.46, RR: 0.26: 100%|██████████| 200/200 [00:26<00:00,  7.60it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.56it/s]

{'b': 0.25, 'k1': 1, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.46
  RR: 0.26


Acc: 0.17, Acc10: 0.44, RR: 0.26: 100%|██████████| 200/200 [00:26<00:00,  7.58it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.89it/s]

{'b': 0.25, 'k1': 1, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.44
  RR: 0.26


Acc: 0.17, Acc10: 0.44, RR: 0.26: 100%|██████████| 200/200 [00:26<00:00,  7.60it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.52it/s]

{'b': 0.25, 'k1': 1, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.44
  RR: 0.26


Acc: 0.17, Acc10: 0.44, RR: 0.26: 100%|██████████| 200/200 [00:26<00:00,  7.62it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.57it/s]

{'b': 0.25, 'k1': 1, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.44
  RR: 0.26


Acc: 0.17, Acc10: 0.44, RR: 0.26: 100%|██████████| 200/200 [00:26<00:00,  7.56it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.50it/s]

{'b': 0.25, 'k1': 1, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.44
  RR: 0.26


Acc: 0.17, Acc10: 0.44, RR: 0.26: 100%|██████████| 200/200 [00:26<00:00,  7.61it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.92it/s]

{'b': 0.25, 'k1': 1, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.44
  RR: 0.26


Acc: 0.23, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:26<00:00,  7.58it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.86it/s]

{'b': 0.25, 'k1': 2, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.23, Acc10: 0.48, RR: 0.31: 100%|██████████| 200/200 [00:26<00:00,  7.61it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.70it/s]

{'b': 0.25, 'k1': 2, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.48
  RR: 0.31


Acc: 0.23, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:26<00:00,  7.58it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.84it/s]

{'b': 0.25, 'k1': 2, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.23, Acc10: 0.49, RR: 0.31: 100%|██████████| 200/200 [00:26<00:00,  7.57it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.88it/s]

{'b': 0.25, 'k1': 2, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.49
  RR: 0.31


Acc: 0.23, Acc10: 0.48, RR: 0.31: 100%|██████████| 200/200 [00:26<00:00,  7.48it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.65it/s]

{'b': 0.25, 'k1': 2, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.48
  RR: 0.31


Acc: 0.23, Acc10: 0.48, RR: 0.31: 100%|██████████| 200/200 [09:38<00:00,  2.89s/it]   
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.22it/s]

{'b': 0.25, 'k1': 2, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.48
  RR: 0.31


Acc: 0.25, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.26it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:29,  6.81it/s]

{'b': 0.25, 'k1': 3, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.25, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:30<00:00,  6.53it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.92it/s]

{'b': 0.25, 'k1': 3, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.25, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.07it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.53it/s]

{'b': 0.25, 'k1': 3, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.25, Acc10: 0.47, RR: 0.32: 100%|██████████| 200/200 [00:27<00:00,  7.24it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.11it/s]

{'b': 0.25, 'k1': 3, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.47
  RR: 0.32


Acc: 0.25, Acc10: 0.47, RR: 0.32: 100%|██████████| 200/200 [00:29<00:00,  6.83it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:32,  6.19it/s]

{'b': 0.25, 'k1': 3, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.47
  RR: 0.32


Acc: 0.25, Acc10: 0.47, RR: 0.32: 100%|██████████| 200/200 [00:30<00:00,  6.58it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.09it/s]

{'b': 0.25, 'k1': 3, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.47
  RR: 0.32


Acc: 0.27, Acc10: 0.49, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.11it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.59it/s]

{'b': 0.25, 'k1': 4, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.49
  RR: 0.33


Acc: 0.27, Acc10: 0.50, RR: 0.33: 100%|██████████| 200/200 [00:29<00:00,  6.78it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:31,  6.26it/s]

{'b': 0.25, 'k1': 4, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.50
  RR: 0.33


Acc: 0.27, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  6.94it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.96it/s]

{'b': 0.25, 'k1': 4, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.27, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.30it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.00it/s]

{'b': 0.25, 'k1': 4, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.27, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.17it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.45it/s]

{'b': 0.25, 'k1': 4, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.26, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:30<00:00,  6.61it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:29,  6.72it/s]

{'b': 0.25, 'k1': 4, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.26
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.27, Acc10: 0.49, RR: 0.33: 100%|██████████| 200/200 [00:29<00:00,  6.86it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.31it/s]

{'b': 0.25, 'k1': 5, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.49
  RR: 0.33


Acc: 0.27, Acc10: 0.48, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.07it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.61it/s]

{'b': 0.25, 'k1': 5, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.48
  RR: 0.33


Acc: 0.27, Acc10: 0.47, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.25it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.38it/s]

{'b': 0.25, 'k1': 5, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.47
  RR: 0.33


Acc: 0.27, Acc10: 0.47, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.24it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.19it/s]

{'b': 0.25, 'k1': 5, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.47
  RR: 0.33


Acc: 0.27, Acc10: 0.47, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.29it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.59it/s]

{'b': 0.25, 'k1': 5, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.47
  RR: 0.33


Acc: 0.27, Acc10: 0.47, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.21it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.22it/s]

{'b': 0.25, 'k1': 5, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.47
  RR: 0.33


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.35it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.78it/s]

{'b': 0.5, 'k1': 0, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.33it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.86it/s]

{'b': 0.5, 'k1': 0, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.41it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.76it/s]

{'b': 0.5, 'k1': 0, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:29<00:00,  6.72it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.75it/s]

{'b': 0.5, 'k1': 0, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.20it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.52it/s]

{'b': 0.5, 'k1': 0, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.16it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.40it/s]

{'b': 0.5, 'k1': 0, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.20, Acc10: 0.49, RR: 0.30: 100%|██████████| 200/200 [00:27<00:00,  7.35it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.77it/s]

{'b': 0.5, 'k1': 1, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.49
  RR: 0.30


Acc: 0.20, Acc10: 0.49, RR: 0.30: 100%|██████████| 200/200 [00:27<00:00,  7.36it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.84it/s]

{'b': 0.5, 'k1': 1, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.49
  RR: 0.30


Acc: 0.20, Acc10: 0.48, RR: 0.30: 100%|██████████| 200/200 [00:27<00:00,  7.30it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.51it/s]

{'b': 0.5, 'k1': 1, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.48
  RR: 0.30


Acc: 0.20, Acc10: 0.48, RR: 0.30: 100%|██████████| 200/200 [00:28<00:00,  6.96it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.74it/s]

{'b': 0.5, 'k1': 1, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.48
  RR: 0.30


Acc: 0.20, Acc10: 0.48, RR: 0.29: 100%|██████████| 200/200 [00:28<00:00,  6.93it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  7.04it/s]

{'b': 0.5, 'k1': 1, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.48
  RR: 0.29


Acc: 0.20, Acc10: 0.48, RR: 0.29: 100%|██████████| 200/200 [00:27<00:00,  7.31it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.65it/s]

{'b': 0.5, 'k1': 1, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.48
  RR: 0.29


Acc: 0.26, Acc10: 0.52, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.30it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.71it/s]

{'b': 0.5, 'k1': 2, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.26
  Accuracy10: 0.52
  RR: 0.34


Acc: 0.26, Acc10: 0.51, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.27it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.25it/s]

{'b': 0.5, 'k1': 2, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.26
  Accuracy10: 0.51
  RR: 0.34


Acc: 0.26, Acc10: 0.51, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.27it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.96it/s]

{'b': 0.5, 'k1': 2, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.26
  Accuracy10: 0.51
  RR: 0.34


Acc: 0.26, Acc10: 0.52, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.28it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.96it/s]

{'b': 0.5, 'k1': 2, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.26
  Accuracy10: 0.52
  RR: 0.34


Acc: 0.25, Acc10: 0.52, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.26it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.85it/s]

{'b': 0.5, 'k1': 2, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.52
  RR: 0.34


Acc: 0.25, Acc10: 0.52, RR: 0.33: 100%|██████████| 200/200 [00:27<00:00,  7.25it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.60it/s]

{'b': 0.5, 'k1': 2, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.52
  RR: 0.33


Acc: 0.28, Acc10: 0.51, RR: 0.36: 100%|██████████| 200/200 [00:27<00:00,  7.28it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.84it/s]

{'b': 0.5, 'k1': 3, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.28
  Accuracy10: 0.51
  RR: 0.36


Acc: 0.28, Acc10: 0.51, RR: 0.35: 100%|██████████| 200/200 [00:27<00:00,  7.25it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.81it/s]

{'b': 0.5, 'k1': 3, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.28
  Accuracy10: 0.51
  RR: 0.35


Acc: 0.27, Acc10: 0.51, RR: 0.35: 100%|██████████| 200/200 [00:27<00:00,  7.27it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.40it/s]

{'b': 0.5, 'k1': 3, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.51
  RR: 0.35


Acc: 0.27, Acc10: 0.51, RR: 0.35: 100%|██████████| 200/200 [00:27<00:00,  7.27it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.34it/s]

{'b': 0.5, 'k1': 3, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.51
  RR: 0.35


Acc: 0.27, Acc10: 0.51, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.28it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.00it/s]

{'b': 0.5, 'k1': 3, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.51
  RR: 0.34


Acc: 0.27, Acc10: 0.50, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.29it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.64it/s]

{'b': 0.5, 'k1': 3, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.50
  RR: 0.34


Acc: 0.28, Acc10: 0.52, RR: 0.36: 100%|██████████| 200/200 [00:27<00:00,  7.31it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.31it/s]

{'b': 0.5, 'k1': 4, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.28
  Accuracy10: 0.52
  RR: 0.36


Acc: 0.27, Acc10: 0.51, RR: 0.35: 100%|██████████| 200/200 [00:27<00:00,  7.28it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.43it/s]

{'b': 0.5, 'k1': 4, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.51
  RR: 0.35


Acc: 0.27, Acc10: 0.49, RR: 0.35: 100%|██████████| 200/200 [00:27<00:00,  7.26it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.83it/s]

{'b': 0.5, 'k1': 4, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.49
  RR: 0.35


Acc: 0.27, Acc10: 0.49, RR: 0.34: 100%|██████████| 200/200 [00:28<00:00,  7.07it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.45it/s]

{'b': 0.5, 'k1': 4, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.49
  RR: 0.34


Acc: 0.27, Acc10: 0.49, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.24it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.71it/s]

{'b': 0.5, 'k1': 4, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.49
  RR: 0.34


Acc: 0.27, Acc10: 0.48, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.22it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  7.08it/s]

{'b': 0.5, 'k1': 4, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.48
  RR: 0.34


Acc: 0.28, Acc10: 0.50, RR: 0.36: 100%|██████████| 200/200 [00:27<00:00,  7.20it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.97it/s]

{'b': 0.5, 'k1': 5, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.28
  Accuracy10: 0.50
  RR: 0.36


Acc: 0.28, Acc10: 0.48, RR: 0.35: 100%|██████████| 200/200 [00:27<00:00,  7.20it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.64it/s]

{'b': 0.5, 'k1': 5, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.28
  Accuracy10: 0.48
  RR: 0.35


Acc: 0.28, Acc10: 0.48, RR: 0.35: 100%|██████████| 200/200 [00:27<00:00,  7.20it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  7.06it/s]

{'b': 0.5, 'k1': 5, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.28
  Accuracy10: 0.48
  RR: 0.35


Acc: 0.27, Acc10: 0.48, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.18it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.13it/s]

{'b': 0.5, 'k1': 5, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.48
  RR: 0.34


Acc: 0.27, Acc10: 0.47, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.18it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.77it/s]

{'b': 0.5, 'k1': 5, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.47
  RR: 0.34


Acc: 0.27, Acc10: 0.47, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.20it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.38it/s]

{'b': 0.5, 'k1': 5, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.47
  RR: 0.34


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.26it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.68it/s]

{'b': 0.75, 'k1': 0, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.24it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.65it/s]

{'b': 0.75, 'k1': 0, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:28<00:00,  6.92it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.24it/s]

{'b': 0.75, 'k1': 0, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:28<00:00,  7.07it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.30it/s]

{'b': 0.75, 'k1': 0, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:28<00:00,  7.14it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.67it/s]

{'b': 0.75, 'k1': 0, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.18it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.46it/s]

{'b': 0.75, 'k1': 0, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.21, Acc10: 0.49, RR: 0.30: 100%|██████████| 200/200 [00:28<00:00,  7.05it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.39it/s]

{'b': 0.75, 'k1': 1, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.21
  Accuracy10: 0.49
  RR: 0.30


Acc: 0.21, Acc10: 0.49, RR: 0.30: 100%|██████████| 200/200 [00:28<00:00,  7.04it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.76it/s]

{'b': 0.75, 'k1': 1, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.21
  Accuracy10: 0.49
  RR: 0.30


Acc: 0.21, Acc10: 0.49, RR: 0.30: 100%|██████████| 200/200 [00:28<00:00,  7.12it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.17it/s]

{'b': 0.75, 'k1': 1, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.21
  Accuracy10: 0.49
  RR: 0.30


Acc: 0.21, Acc10: 0.49, RR: 0.30: 100%|██████████| 200/200 [00:28<00:00,  7.06it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.39it/s]

{'b': 0.75, 'k1': 1, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.21
  Accuracy10: 0.49
  RR: 0.30


Acc: 0.20, Acc10: 0.49, RR: 0.29: 100%|██████████| 200/200 [00:28<00:00,  7.01it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.97it/s]

{'b': 0.75, 'k1': 1, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.49
  RR: 0.29


Acc: 0.20, Acc10: 0.49, RR: 0.29: 100%|██████████| 200/200 [00:28<00:00,  7.06it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.76it/s]

{'b': 0.75, 'k1': 1, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.49
  RR: 0.29


Acc: 0.24, Acc10: 0.51, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.06it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.59it/s]

{'b': 0.75, 'k1': 2, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.51
  RR: 0.33


Acc: 0.24, Acc10: 0.51, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.09it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.40it/s]

{'b': 0.75, 'k1': 2, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.51
  RR: 0.33


Acc: 0.24, Acc10: 0.51, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.06it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.13it/s]

{'b': 0.75, 'k1': 2, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.51
  RR: 0.33


Acc: 0.23, Acc10: 0.50, RR: 0.32: 100%|██████████| 200/200 [00:28<00:00,  7.12it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.93it/s]

{'b': 0.75, 'k1': 2, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.50
  RR: 0.32


Acc: 0.23, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:28<00:00,  7.11it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.27it/s]

{'b': 0.75, 'k1': 2, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.23, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:28<00:00,  7.12it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.36it/s]

{'b': 0.75, 'k1': 2, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.25, Acc10: 0.51, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.03it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.35it/s]

{'b': 0.75, 'k1': 3, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.51
  RR: 0.33


Acc: 0.25, Acc10: 0.51, RR: 0.33: 100%|██████████| 200/200 [00:28<00:00,  7.04it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.66it/s]

{'b': 0.75, 'k1': 3, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.51
  RR: 0.33


Acc: 0.23, Acc10: 0.50, RR: 0.32: 100%|██████████| 200/200 [00:28<00:00,  7.03it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.69it/s]

{'b': 0.75, 'k1': 3, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.50
  RR: 0.32


Acc: 0.23, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:28<00:00,  7.01it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.94it/s]

{'b': 0.75, 'k1': 3, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.23, Acc10: 0.48, RR: 0.31: 100%|██████████| 200/200 [00:28<00:00,  7.05it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.60it/s]

{'b': 0.75, 'k1': 3, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.48
  RR: 0.31


Acc: 0.23, Acc10: 0.48, RR: 0.31: 100%|██████████| 200/200 [00:28<00:00,  7.10it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.50it/s]

{'b': 0.75, 'k1': 3, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.48
  RR: 0.31


Acc: 0.27, Acc10: 0.51, RR: 0.34: 100%|██████████| 200/200 [00:27<00:00,  7.34it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.86it/s]

{'b': 0.75, 'k1': 4, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.27
  Accuracy10: 0.51
  RR: 0.34


Acc: 0.26, Acc10: 0.51, RR: 0.33: 100%|██████████| 200/200 [00:26<00:00,  7.43it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.12it/s]

{'b': 0.75, 'k1': 4, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.26
  Accuracy10: 0.51
  RR: 0.33


Acc: 0.25, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:26<00:00,  7.46it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.73it/s]

{'b': 0.75, 'k1': 4, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.25
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.24, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:26<00:00,  7.41it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.57it/s]

{'b': 0.75, 'k1': 4, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.24, Acc10: 0.49, RR: 0.31: 100%|██████████| 200/200 [00:26<00:00,  7.43it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.79it/s]

{'b': 0.75, 'k1': 4, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.49
  RR: 0.31


Acc: 0.24, Acc10: 0.48, RR: 0.31: 100%|██████████| 200/200 [00:26<00:00,  7.48it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.80it/s]

{'b': 0.75, 'k1': 4, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.48
  RR: 0.31


Acc: 0.26, Acc10: 0.51, RR: 0.33: 100%|██████████| 200/200 [00:26<00:00,  7.47it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.84it/s]

{'b': 0.75, 'k1': 5, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.26
  Accuracy10: 0.51
  RR: 0.33


Acc: 0.24, Acc10: 0.49, RR: 0.32: 100%|██████████| 200/200 [00:26<00:00,  7.44it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.75it/s]

{'b': 0.75, 'k1': 5, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.49
  RR: 0.32


Acc: 0.24, Acc10: 0.47, RR: 0.32: 100%|██████████| 200/200 [00:27<00:00,  7.38it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.80it/s]

{'b': 0.75, 'k1': 5, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.24
  Accuracy10: 0.47
  RR: 0.32


Acc: 0.23, Acc10: 0.47, RR: 0.31: 100%|██████████| 200/200 [00:26<00:00,  7.47it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.82it/s]

{'b': 0.75, 'k1': 5, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.47
  RR: 0.31


Acc: 0.23, Acc10: 0.47, RR: 0.31: 100%|██████████| 200/200 [00:26<00:00,  7.47it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.78it/s]

{'b': 0.75, 'k1': 5, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.47
  RR: 0.31


Acc: 0.23, Acc10: 0.47, RR: 0.30: 100%|██████████| 200/200 [00:26<00:00,  7.47it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.16it/s]

{'b': 0.75, 'k1': 5, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.23
  Accuracy10: 0.47
  RR: 0.30


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.56it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.88it/s]

{'b': 1.0, 'k1': 0, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.58it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.89it/s]

{'b': 1.0, 'k1': 0, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.55it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.01it/s]

{'b': 1.0, 'k1': 0, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.61it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.92it/s]

{'b': 1.0, 'k1': 0, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:26<00:00,  7.55it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  8.27it/s]

{'b': 1.0, 'k1': 0, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.06, Acc10: 0.21, RR: 0.10: 100%|██████████| 200/200 [00:27<00:00,  7.39it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.88it/s]

{'b': 1.0, 'k1': 0, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.06
  Accuracy10: 0.21
  RR: 0.10


Acc: 0.19, Acc10: 0.47, RR: 0.27: 100%|██████████| 200/200 [00:27<00:00,  7.36it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.54it/s]

{'b': 1.0, 'k1': 1, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.19
  Accuracy10: 0.47
  RR: 0.27


Acc: 0.18, Acc10: 0.47, RR: 0.27: 100%|██████████| 200/200 [00:27<00:00,  7.34it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.81it/s]

{'b': 1.0, 'k1': 1, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.18
  Accuracy10: 0.47
  RR: 0.27


Acc: 0.18, Acc10: 0.47, RR: 0.27: 100%|██████████| 200/200 [00:26<00:00,  7.41it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.54it/s]

{'b': 1.0, 'k1': 1, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.18
  Accuracy10: 0.47
  RR: 0.27


Acc: 0.17, Acc10: 0.47, RR: 0.26: 100%|██████████| 200/200 [00:28<00:00,  7.09it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:32,  6.17it/s]

{'b': 1.0, 'k1': 1, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.47
  RR: 0.26


Acc: 0.17, Acc10: 0.46, RR: 0.26: 100%|██████████| 200/200 [00:29<00:00,  6.73it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.92it/s]

{'b': 1.0, 'k1': 1, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.46
  RR: 0.26


Acc: 0.17, Acc10: 0.46, RR: 0.26: 100%|██████████| 200/200 [00:29<00:00,  6.67it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:35,  5.63it/s]

{'b': 1.0, 'k1': 1, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.46
  RR: 0.26


Acc: 0.20, Acc10: 0.44, RR: 0.28: 100%|██████████| 200/200 [00:31<00:00,  6.37it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.29it/s]

{'b': 1.0, 'k1': 2, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.44
  RR: 0.28


Acc: 0.20, Acc10: 0.44, RR: 0.27: 100%|██████████| 200/200 [00:31<00:00,  6.35it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:33,  5.88it/s]

{'b': 1.0, 'k1': 2, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.44
  RR: 0.27


Acc: 0.20, Acc10: 0.43, RR: 0.27: 100%|██████████| 200/200 [00:32<00:00,  6.22it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:31,  6.34it/s]

{'b': 1.0, 'k1': 2, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.43
  RR: 0.27


Acc: 0.19, Acc10: 0.43, RR: 0.26: 100%|██████████| 200/200 [00:29<00:00,  6.80it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:31,  6.42it/s]

{'b': 1.0, 'k1': 2, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.19
  Accuracy10: 0.43
  RR: 0.26


Acc: 0.18, Acc10: 0.43, RR: 0.26: 100%|██████████| 200/200 [00:32<00:00,  6.20it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.41it/s]

{'b': 1.0, 'k1': 2, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.18
  Accuracy10: 0.43
  RR: 0.26


Acc: 0.17, Acc10: 0.43, RR: 0.25: 100%|██████████| 200/200 [00:31<00:00,  6.29it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:35,  5.60it/s]

{'b': 1.0, 'k1': 2, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.43
  RR: 0.25


Acc: 0.20, Acc10: 0.43, RR: 0.27: 100%|██████████| 200/200 [00:32<00:00,  6.16it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:29,  6.68it/s]

{'b': 1.0, 'k1': 3, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.20
  Accuracy10: 0.43
  RR: 0.27


Acc: 0.18, Acc10: 0.42, RR: 0.25: 100%|██████████| 200/200 [00:34<00:00,  5.86it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:37,  5.35it/s]

{'b': 1.0, 'k1': 3, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.18
  Accuracy10: 0.42
  RR: 0.25


Acc: 0.17, Acc10: 0.41, RR: 0.25: 100%|██████████| 200/200 [00:37<00:00,  5.37it/s]
  0%|          | 0/200 [00:00<?, ?it/s]

{'b': 1.0, 'k1': 3, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.41
  RR: 0.25


Acc: 0.17, Acc10: 0.41, RR: 0.24: 100%|██████████| 200/200 [00:33<00:00,  6.03it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:30,  6.62it/s]

{'b': 1.0, 'k1': 3, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.41
  RR: 0.24


Acc: 0.17, Acc10: 0.40, RR: 0.24: 100%|██████████| 200/200 [00:32<00:00,  6.12it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:30,  6.49it/s]

{'b': 1.0, 'k1': 3, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.40
  RR: 0.24


Acc: 0.17, Acc10: 0.40, RR: 0.24: 100%|██████████| 200/200 [00:35<00:00,  5.69it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:39,  5.08it/s]

{'b': 1.0, 'k1': 3, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.17
  Accuracy10: 0.40
  RR: 0.24


Acc: 0.15, Acc10: 0.42, RR: 0.24: 100%|██████████| 200/200 [00:53<00:00,  3.73it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:35,  5.65it/s]

{'b': 1.0, 'k1': 4, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.15
  Accuracy10: 0.42
  RR: 0.24


Acc: 0.15, Acc10: 0.40, RR: 0.23: 100%|██████████| 200/200 [00:33<00:00,  5.89it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:25,  7.81it/s]

{'b': 1.0, 'k1': 4, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.15
  Accuracy10: 0.40
  RR: 0.23


Acc: 0.14, Acc10: 0.39, RR: 0.22: 100%|██████████| 200/200 [00:30<00:00,  6.54it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.36it/s]

{'b': 1.0, 'k1': 4, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.14
  Accuracy10: 0.39
  RR: 0.22


Acc: 0.14, Acc10: 0.38, RR: 0.22: 100%|██████████| 200/200 [00:31<00:00,  6.26it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:30,  6.61it/s]

{'b': 1.0, 'k1': 4, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.14
  Accuracy10: 0.38
  RR: 0.22


Acc: 0.14, Acc10: 0.38, RR: 0.22: 100%|██████████| 200/200 [00:34<00:00,  5.79it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:29,  6.80it/s]

{'b': 1.0, 'k1': 4, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.14
  Accuracy10: 0.38
  RR: 0.22


Acc: 0.14, Acc10: 0.38, RR: 0.21: 100%|██████████| 200/200 [00:32<00:00,  6.24it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:26,  7.38it/s]

{'b': 1.0, 'k1': 4, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.14
  Accuracy10: 0.38
  RR: 0.21


Acc: 0.14, Acc10: 0.40, RR: 0.23: 100%|██████████| 200/200 [00:31<00:00,  6.39it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:28,  6.88it/s]

{'b': 1.0, 'k1': 5, 'k2': 0, 'type': 'bm25'}
  Accuracy: 0.14
  Accuracy10: 0.40
  RR: 0.23


Acc: 0.13, Acc10: 0.39, RR: 0.21: 100%|██████████| 200/200 [00:29<00:00,  6.75it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:27,  7.13it/s]

{'b': 1.0, 'k1': 5, 'k2': 1, 'type': 'bm25'}
  Accuracy: 0.13
  Accuracy10: 0.39
  RR: 0.21


Acc: 0.12, Acc10: 0.38, RR: 0.21: 100%|██████████| 200/200 [00:29<00:00,  6.89it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:30,  6.44it/s]

{'b': 1.0, 'k1': 5, 'k2': 2, 'type': 'bm25'}
  Accuracy: 0.12
  Accuracy10: 0.38
  RR: 0.21


Acc: 0.12, Acc10: 0.36, RR: 0.20: 100%|██████████| 200/200 [00:30<00:00,  6.66it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:32,  6.06it/s]

{'b': 1.0, 'k1': 5, 'k2': 3, 'type': 'bm25'}
  Accuracy: 0.12
  Accuracy10: 0.36
  RR: 0.20


Acc: 0.12, Acc10: 0.36, RR: 0.20: 100%|██████████| 200/200 [00:29<00:00,  6.75it/s]
Acc: 0.00, Acc10: 0.00, RR: 0.00:   0%|          | 1/200 [00:00<00:24,  7.99it/s]

{'b': 1.0, 'k1': 5, 'k2': 4, 'type': 'bm25'}
  Accuracy: 0.12
  Accuracy10: 0.36
  RR: 0.20


Acc: 0.12, Acc10: 0.35, RR: 0.20: 100%|██████████| 200/200 [00:29<00:00,  6.83it/s]

{'b': 1.0, 'k1': 5, 'k2': 5, 'type': 'bm25'}
  Accuracy: 0.12
  Accuracy10: 0.35
  RR: 0.20





{'b': 0.5, 'k1': 4, 'k2': 0, 'type': 'bm25'}

Accuracy: 0.28

Accuracy10: 0.52

RR: 0.36

## ~Дополнительно~
### 4.Поиск в векторной модели (+1 балл)

Используйте готовые эмбединги или энкодер нейронной модели, и преобразуйте с помощью него запросы и документы в вектора небольшой размерности.
Реализуйте поиск лучших документов по косинусной мере или скалярному произведению между векторами запроса и документа. Сравните результаты с моделями из предыдущих пунктов по метрикам и позапросно – выигрывает ли векторный вариант на тех примерах, где предыдущие модели не справляются? 

Для начала попробуем обучить модель на 15к статьях:

In [None]:
import gensim

data = [v for _, v in bags_of_words.items()]
model1 = gensim.models.Word2Vec(data, min_count = 1, size = 100, window = 5) 

In [57]:
from statistics import mean 
import warnings
warnings.filterwarnings('ignore')

def prepare_collection_vectors(model, docs, use_cut_data=False):
    doc_vectors = {}
    
    for title, words0 in docs.items():
        document_matrix = []
        words = words0[:30] if use_cut_data else words0
        for word in words:
            if word in model:
                document_matrix.append(model[word])
        document_vector = []
        for c in list(zip(*document_matrix)):
            document_vector.append(mean(coord))
        doc_vectors[title] = document_vector
            
    return doc_vectors

In [58]:
from scipy import spatial

def query2vec(model, query):
    emb = []
    q = []
    for word in make_terms(query):
        if word in model:
            emb.append(model[word])
    for v in list(zip(*emb)):
        q.append(mean(v))
    return q

    
def search_vector(model, query, doc_vectors):
    query_vector = query2vec(model, query)

    documents_range = []
    for title, vector in doc_vectors.items():
        if len(vector) != 0:
            documents_range.append((title, spatial.distance.cosine(vector, query_vector)))
        
    return sorted(documents_range, key=lambda x: x[1])
   
    

def demo_search(model, doc_vectors):
    for query in ["coronovirus in belarus",
                  "who won junior eurovision in 2005",
                  "science about full-text search",
                 ]:
        result = search_vector(model, query, doc_vectors)[:5]
        print(f"[{query}]")
        for article_name, score in result:
            print(f"{score:7.2f}  {article_name}")
        print("\n")

In [None]:
doc_vectors = prepare_collection_vectors(model1, bags_of_words)
demo_search(model1, doc_vectors)

In [60]:
def run_vector(title, model, queries, docs):
    accuracy = 0.0
    accuracy10 = 0.0
    rr = 0.0
    processed = 0
    q = []
    with tqdm(queries) as progress:
        for query, answer in progress:
            result = search_vector(model, query, docs)[:10]
            
            rank = None
            for position, (article_name, score) in enumerate(result):
                if article_name == answer:
                    rank = position + 1
                    if position == 0:
                        q.append((query, article_name))
                    break
                
            if rank is not None:
                accuracy += (rank == 1)
                accuracy10 += (rank <= 10)
                rr += 1.0 / rank
                
            processed += 1
            progress.set_description(f'Acc: {accuracy/processed:0.2f}, Acc10: {accuracy10/processed:0.2f}, RR: {rr/processed:0.2f}')
    print(f'{title}\n  Accuracy: {accuracy/processed:0.2f}\n  Accuracy10: {accuracy10/processed:0.2f}\n  RR: {rr/processed:0.2f}')
    return q

In [61]:
def print_better(q):
    for query, title in q:
        result = search(query, collection_data, {})
        if result[0][0] != title:
            print(f'word2vec is better!\t\nquery: {query}\t\nbm25: {result[0]}\t\nw2v: {title}\n')

In [77]:
q = run_vector("Word2Vec", model1, queries, doc_vectors)

Acc: 0.02, Acc10: 0.06, RR: 0.03: 100%|██████████| 200/200 [10:47<00:00,  3.24s/it]

Word2Vec
  Accuracy: 0.02
  Accuracy10: 0.06
  RR: 0.03





ой ну результаты что-то совсем так себе :(

In [79]:
print_better(q)

word2vec is better!	
query: what types of diseases can be passed from animals to humans	
bm25: ('Salmonella', 22.085737138680017)	
w2v: Zoonosis

word2vec is better!	
query: which diseases do microorganisms cause in plants and animals	
bm25: ('Microorganism', 21.19679855035709)	
w2v: Plant_pathology

word2vec is better!	
query: who simplified the naming of living things by giving each species a two-part scientific name	
bm25: ('Type_species', 23.59572256903776)	
w2v: Binomial_nomenclature



итого, word2vec справился лучше бм25 в двух запросах (на первой позиции нашел релевантный документ)

Теперь попробуем использовать только заголовок и примерно первое предложение статей: 

In [62]:
doc_vectors_cut = prepare_collection_vectors(model1, bags_of_words, use_cut_data=True)
demo_search(model1, doc_vectors_cut)

[coronovirus in belarus]
   0.85  Character
   0.85  Civil_parishes_in_Cornwall
   0.82  Frankfurt_Airport
   0.82  PCP
   0.81  Berlin_Zoological_Garden


[who won junior eurovision in 2005]
   0.88  Michael_Phelps
   0.86  Lindsey_Vonn
   0.85  Kurt_Browning
   0.85  Shawn_Marion
   0.85  Ivan_Boldirev


[science about full-text search]
   0.78  Science_fiction_movie
   0.77  Science
   0.76  Philosophy_of_science
   0.75  Credit_history
   0.74  Theoretical_computer_science




In [80]:
q_cut = run_vector("Word2Vec cut docs", model1, queries, doc_vectors_cut)

Acc: 0.01, Acc10: 0.05, RR: 0.02: 100%|██████████| 200/200 [14:04<00:00,  4.22s/it]

Word2Vec cut docs
  Accuracy: 0.01
  Accuracy10: 0.05
  RR: 0.02





Штош, результаты стали еще хуже :(

In [82]:
print_better(q_cut)

word2vec is better!	
query: what are the names of all the european countries	
bm25: ('Eastern_European_Time', 9.667030982155106)	
w2v: List_of_European_countries_by_area



Теперь попробуем предобученную модель:

In [18]:
from gensim.models import KeyedVectors
import codecs

w2v_file = codecs.open("/Users/danielmuraveyko/Downloads/glove.6B/glove.6B.100d.txt", encoding='utf-8')
model_pre = KeyedVectors.load_word2vec_format(w2v_file, binary=False)

doc_vectors_pre = prepare_collection_vectors(model_pre, bags_of_words, use_cut_data=True)
demo_search(model_pre, doc_vectors_pre)

In [56]:
q = run_vector("Word2Vec Pre-Trained", model_pre, queries, doc_vectors)

Acc: 0.10, Acc10: 0.20, RR: 0.12: 100%|██████████| 200/200 [02:56<00:00,  1.13it/s]

Word2Vec Pre-Trained
  Accuracy: 0.10
  Accuracy10: 0.20
  RR: 0.12





In [62]:
print_better(q)

word2vec is better!	
query: what are the 3 types of plastids in plant cells	
bm25: ('Plasmodesma', 16.432503870122957)	
w2v: Plastid

word2vec is better!	
query: what are the 3 largest countries in europe	
bm25: ('German_Americans', 11.767606775149444)	
w2v: List_of_European_countries_by_area

word2vec is better!	
query: which of the following contain members that have chloroplasts in their cells	
bm25: ('Red_algae', 19.904024203155757)	
w2v: Chloroplast

word2vec is better!	
query: who was the first scientist to suggest that different elements contain different types of atoms	
bm25: ('Chemical_compound', 18.338314880238215)	
w2v: Chemical_element

word2vec is better!	
query: who discovered the 8 different types of fingerprints	
bm25: ('Armed_Forces_DNA_Identification_Laboratory', 10.174559983622885)	
w2v: Fingerprint

word2vec is better!	
query: what are the different types of molecular movements found in living organisms	
bm25: ('Biosemiotics', 14.888146606308936)	
w2v: Cell

word2ve