In [23]:
import pandas as pd
from rank_bm25 import BM25Okapi
import nltk
from nltk.stem.porter import PorterStemmer

nltk.download('punkt')
nltk.download('stopwords')

def stem_tokenize(text, remove_stopwords=True):
  stemmer = PorterStemmer()
  tokens = [word for sent in nltk.sent_tokenize(text) \
                                      for word in nltk.word_tokenize(sent)]
  tokens = [word for word in tokens if word not in \
          nltk.corpus.stopwords.words('english')]
  return [stemmer.stem(word) for word in tokens]

[nltk_data] Downloading package punkt to /Users/moli/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/moli/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [24]:
# Files paths

request_file_path = '../data/dev.tsv'
question_bank_path = '../data/question_bank.tsv'
run_file_path = '../sample_runs/dev_bm25'

In [25]:
# Reads files and build bm25 corpus (index)

dev = pd.read_csv(request_file_path, sep='\t')
question_bank = pd.read_csv(question_bank_path, sep='\t').fillna('')

question_bank['tokenized_question_list'] = question_bank['question'].map(stem_tokenize)
question_bank['tokenized_question_str'] = question_bank['tokenized_question_list'].map(lambda x: ' '.join(x))

bm25_corpus = question_bank['tokenized_question_list'].tolist()
bm25 = BM25Okapi(bm25_corpus)

In [26]:
# Runs bm25 for every query and stores output in file.

with open(run_file_path, 'w') as fo:
  for tid in dev['topic_id'].unique():
    query = dev.loc[dev['topic_id']==tid, 'initial_request'].tolist()[0]
    bm25_ranked_list = bm25.get_top_n(stem_tokenize(query, True), 
                                    bm25_corpus, 
                                    n=30)
    bm25_q_list = [' '.join(sent) for sent in bm25_ranked_list]
    preds = question_bank.set_index('tokenized_question_str').loc[bm25_q_list, 'question_id'].tolist()
    for i, qid in enumerate(preds):    
      fo.write('{} 0 {} {} {} bm25\n'.format(tid, qid, i, len(preds)-i))

In [29]:
# Report question relevance performance
! python clariq_eval_tool.py    --eval_task question_relevance\
                                --data_dir ../data/ \
                                --experiment_type dev \
                                --run_file {run_file_path} \
                                --out_file {run_file_path}_question_relevance.eval

Recall5: 0.3245570421150917
Recall10: 0.5638042646208281
Recall20: 0.6674997108155003
Recall30: 0.6912818698329535


In [30]:
! python clariq_eval_tool.py    --eval_task document_relevance\
                                --data_dir ../data/ \
                                --experiment_type dev \
                                --run_file {run_file_path} \
                                --out_file {run_file_path}.eval

NDCG1: 0.1859375
NDCG3: 0.16076878817226317
NDCG5: 0.15299655291242895
NDCG10: 0.1362843190319216
NDCG20: 0.12845266881072878
P1: 0.23125
P3: 0.18958333333333333
P5: 0.17500000000000002
P10: 0.140625
P20: 0.11812500000000001
MRR100: 0.30959228090067364
