cdqa-suite · fmikaelian · Apr 29, 2019 · Apr 28, 2019 · Apr 28, 2019 · Apr 29, 2019
diff --git a/README.md b/README.md
@@ -1,10 +1,11 @@
 # cdQA <img src="https://cdn.discordapp.com/emojis/513893717816508416.gif" width="40" height="40"/>
 
 [![Build Status](https://travis-ci.com/fmikaelian/cdQA.svg?token=Vzy9RRKRZ41ynd9q2BRX&branch=develop)](https://travis-ci.com/fmikaelian/cdQA) [![codecov](https://codecov.io/gh/fmikaelian/cdQA/branch/develop/graph/badge.svg?token=F16X0IU6RT)](https://codecov.io/gh/fmikaelian/cdQA)
+[![PyPI Downloads](https://img.shields.io/pypi/v/tensorflow.svg)](https://pypi.org/project/tensorflow/)
+[![PyPI Version](https://img.shields.io/pypi/dm/tensorflow.svg)](https://pypi.org/project/tensorflow/)
 [![Binder](https://mybinder.org/badge.svg)]()
 [![Colab](https://colab.research.google.com/assets/colab-badge.svg)]()
- [![License](
-https://img.shields.io/badge/License-MIT-yellow.svg)](https://choosealicense.com/licenses/mit/)
+[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://choosealicense.com/licenses/mit/)
 
 An end-to-end closed-domain question answering system with BERT and classic IR methods 📚
 
@@ -170,7 +171,6 @@ python cdqa/pipeline/download.py
 
 The data is saved in  `/data` and the models in `/models`. You can load the models with `joblib.load()`.
 
-
 ### Practical examples
 
 A complete worfklow is described in our [`examples`](examples) notebook.

diff --git a/cdqa/reader/bertqa_sklearn.py b/cdqa/reader/bertqa_sklearn.py
@@ -1186,7 +1186,7 @@ def predict(self, X):
         self.model.eval()
         all_results = []
         logger.info("Start evaluating")
-        for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating", disable=self.local_rank not in [-1, 0]):
+        for input_ids, input_mask, segment_ids, example_indices in eval_dataloader:
             if len(all_results) % 1000 == 0:
                 logger.info("Processing example: %d" % (len(all_results)))
             input_ids = input_ids.to(self.device)

diff --git a/cdqa/retriever/tfidf_doc_ranker.py b/cdqa/retriever/tfidf_doc_ranker.py
@@ -55,12 +55,12 @@ class TfidfRetriever(BaseEstimator):
     """
 
     def __init__(self,
+                 metadata,
                  ngram_range=(1, 2),
                  max_df=0.85,
                  stop_words='english',
                  paragraphs=None,
                  top_n=3,
-                 metadata,
                  verbose=True):
 
         self.ngram_range = ngram_range
@@ -85,10 +85,10 @@ def predict(self, X):
         t0 = time.time()
         question_vector = self.vectorizer.transform([X])
         scores = pd.DataFrame(self.tfidf_matrix.dot(question_vector.T).toarray())
-        closest_docs_indices = scores.sort_values(by=0, ascending=False).index[:top_n].values
+        closest_docs_indices = scores.sort_values(by=0, ascending=False).index[:self.top_n].values
 
         # inspired from https://github.com/facebookresearch/DrQA/blob/50d0e49bb77fe0c6e881efb4b6fe2e61d3f92509/scripts/reader/interactive.py#L63
-        if verbose:
+        if self.verbose:
             rank = 1
             table = prettytable.PrettyTable(['rank', 'index', 'title'])
             for i in range(len(closest_docs_indices)):

diff --git a/cdqa/utils/converter.py b/cdqa/utils/converter.py
@@ -54,15 +54,15 @@ def df2squad(df, squad_version='v2.0', output_dir=None, filename=None):
 
    return json_data
 
-def generate_squad_examples(question, article_indices, metadata):
+def generate_squad_examples(question, closest_docs_indices, metadata):
     """
     Creates a SQuAD examples json object for a given for a given question using outputs of retriever and document database.
 
     Parameters
     ----------
     question : [type]
         [description]
-    article_indices : [type]
+    closest_docs_indices : [type]
         [description]
     metadata : [type]
         [description]
@@ -76,15 +76,15 @@ def generate_squad_examples(question, article_indices, metadata):
     --------
     >>> from cdqa.utils.converter import generate_squad_examples
     >>> squad_examples = generate_squad_examples(question='Since when does the the Excellence Program of BNP Paribas exist?',
-                                         article_indices=[788, 408, 2419],
+                                         closest_docs_indices=[788, 408, 2419],
                                          metadata=df)
 
     """
 
 
     squad_examples = []
 
-    metadata_sliced = metadata.loc[article_indices]
+    metadata_sliced = metadata.loc[closest_docs_indices]
 
     for index, row in tqdm(metadata_sliced.iterrows()):
         temp = {'title': row['title'],