In [None]:
import pandas as pd
import pyterrier as pt
if not pt.started():
    pt.init()

from googletrans import Translator
translator = Translator()

In [3]:
def translate_topics(topics, dest):
    translated =  topics.apply(lambda x : translator.translate(x.title + '. ' + x.description, dest=dest).text.replace('?','').replace('/','').replace("'", " "), axis=1)
    df = pd.DataFrame(translated, columns=['query'])
    df['qid'] = topics['qid']
    return df

In [6]:
def run_baselines(index_path, human_translated_topics, machine_translated_topics, qrels, bm25_parameters, dlm_parameters, eval_metrics):
    index_ref = pt.IndexRef.of(index_path)
    index = pt.IndexFactory.of(index_ref)
    bm25 = pt.BatchRetrieve(index, wmodel="BM25", controls = bm25_parameters)
    dlm = pt.BatchRetrieve(index, wmodel="DirichletLM", controls = dlm_parameters)
    bm25_results = bm25(machine_translated_topics)
    dlm_results = dlm(machine_translated_topics)
    print('-- Human translation --')
    display(pt.Experiment(
        retr_systems=[bm25, dlm],
        names=['BM25', 'DLM'],
        topics=human_translated_topics,
        qrels=qrels,
        eval_metrics=eval_metrics))
    print('-- Machine translation --')
    display(pt.Experiment(
        retr_systems=[bm25_results, dlm_results],
        names=['BM25', 'DLM'],
        topics=machine_translated_topics,
        qrels=qrels,
        eval_metrics=eval_metrics))