From 3932d69ec80e0590b8657de9abbd179db150a3be Mon Sep 17 00:00:00 2001
From: fmikaelian <39884124+fmikaelian@users.noreply.github.com>
Date: Sun, 28 Apr 2019 11:51:24 +0200
Subject: [PATCH 1/4] Add github badges #87

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 9fd6464..e450864 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
 # cdQA <img src="https://cdn.discordapp.com/emojis/513893717816508416.gif" width="40" height="40"/>
 
 [![Build Status](https://travis-ci.com/fmikaelian/cdQA.svg?token=Vzy9RRKRZ41ynd9q2BRX&branch=develop)](https://travis-ci.com/fmikaelian/cdQA) [![codecov](https://codecov.io/gh/fmikaelian/cdQA/branch/develop/graph/badge.svg?token=F16X0IU6RT)](https://codecov.io/gh/fmikaelian/cdQA)
+[![PyPI Downloads](https://img.shields.io/pypi/v/tensorflow.svg)](https://pypi.org/project/tensorflow/)
+[![PyPI Version](https://img.shields.io/pypi/dm/tensorflow.svg)](https://pypi.org/project/tensorflow/)
 [![Binder](https://mybinder.org/badge.svg)]()
 [![Colab](https://colab.research.google.com/assets/colab-badge.svg)]()
- [![License](
-https://img.shields.io/badge/License-MIT-yellow.svg)](https://choosealicense.com/licenses/mit/)
+[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://choosealicense.com/licenses/mit/)
 
 An end-to-end closed-domain question answering system with BERT and classic IR methods 📚
 
@@ -170,7 +171,6 @@ python cdqa/pipeline/download.py
 
 The data is saved in  `/data` and the models in `/models`. You can load the models with `joblib.load()`.
 
-
 ### Practical examples
 
 A complete worfklow is described in our [`examples`](examples) notebook.

From 3311885988c7fbeca12dfc0b640ee22d7367fe26 Mon Sep 17 00:00:00 2001
From: fmikaelian <39884124+fmikaelian@users.noreply.github.com>
Date: Sun, 28 Apr 2019 11:58:04 +0200
Subject: [PATCH 2/4] Disable verbose during predictions #103

---
 cdqa/reader/bertqa_sklearn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cdqa/reader/bertqa_sklearn.py b/cdqa/reader/bertqa_sklearn.py
index 5c11fab..5cf0dde 100644
--- a/cdqa/reader/bertqa_sklearn.py
+++ b/cdqa/reader/bertqa_sklearn.py
@@ -1186,7 +1186,7 @@ def predict(self, X):
         self.model.eval()
         all_results = []
         logger.info("Start evaluating")
-        for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating", disable=self.local_rank not in [-1, 0]):
+        for input_ids, input_mask, segment_ids, example_indices in eval_dataloader:
             if len(all_results) % 1000 == 0:
                 logger.info("Processing example: %d" % (len(all_results)))
             input_ids = input_ids.to(self.device)

From 95931376009f9f3ef6c60fa124ee933967a2685b Mon Sep 17 00:00:00 2001
From: fmikaelian <39884124+fmikaelian@users.noreply.github.com>
Date: Mon, 29 Apr 2019 10:02:36 +0200
Subject: [PATCH 3/4] fix typos and tests #95

---
 cdqa/retriever/tfidf_doc_ranker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cdqa/retriever/tfidf_doc_ranker.py b/cdqa/retriever/tfidf_doc_ranker.py
index bfade32..d39d76a 100644
--- a/cdqa/retriever/tfidf_doc_ranker.py
+++ b/cdqa/retriever/tfidf_doc_ranker.py
@@ -55,12 +55,12 @@ class TfidfRetriever(BaseEstimator):
     """
 
     def __init__(self,
+                 metadata,
                  ngram_range=(1, 2),
                  max_df=0.85,
                  stop_words='english',
                  paragraphs=None,
                  top_n=3,
-                 metadata,
                  verbose=True):
 
         self.ngram_range = ngram_range
@@ -85,10 +85,10 @@ def predict(self, X):
         t0 = time.time()
         question_vector = self.vectorizer.transform([X])
         scores = pd.DataFrame(self.tfidf_matrix.dot(question_vector.T).toarray())
-        closest_docs_indices = scores.sort_values(by=0, ascending=False).index[:top_n].values
+        closest_docs_indices = scores.sort_values(by=0, ascending=False).index[:self.top_n].values
         
         # inspired from https://github.com/facebookresearch/DrQA/blob/50d0e49bb77fe0c6e881efb4b6fe2e61d3f92509/scripts/reader/interactive.py#L63
-        if verbose:
+        if self.verbose:
             rank = 1
             table = prettytable.PrettyTable(['rank', 'index', 'title'])
             for i in range(len(closest_docs_indices)):

From db3fa6697870a098948adafed85eb27e4e6502cf Mon Sep 17 00:00:00 2001
From: fmikaelian <39884124+fmikaelian@users.noreply.github.com>
Date: Mon, 29 Apr 2019 10:06:41 +0200
Subject: [PATCH 4/4] Rename variables and scripts #108

---
 cdqa/utils/converter.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cdqa/utils/converter.py b/cdqa/utils/converter.py
index d589029..9e78b82 100644
--- a/cdqa/utils/converter.py
+++ b/cdqa/utils/converter.py
@@ -54,7 +54,7 @@ def df2squad(df, squad_version='v2.0', output_dir=None, filename=None):
 
    return json_data
 
-def generate_squad_examples(question, article_indices, metadata):
+def generate_squad_examples(question, closest_docs_indices, metadata):
     """
     Creates a SQuAD examples json object for a given for a given question using outputs of retriever and document database.
     
@@ -62,7 +62,7 @@ def generate_squad_examples(question, article_indices, metadata):
     ----------
     question : [type]
         [description]
-    article_indices : [type]
+    closest_docs_indices : [type]
         [description]
     metadata : [type]
         [description]
@@ -76,7 +76,7 @@ def generate_squad_examples(question, article_indices, metadata):
     --------
     >>> from cdqa.utils.converter import generate_squad_examples
     >>> squad_examples = generate_squad_examples(question='Since when does the the Excellence Program of BNP Paribas exist?',
-                                         article_indices=[788, 408, 2419],
+                                         closest_docs_indices=[788, 408, 2419],
                                          metadata=df)
 
     """
@@ -84,7 +84,7 @@ def generate_squad_examples(question, article_indices, metadata):
     
     squad_examples = []
     
-    metadata_sliced = metadata.loc[article_indices]
+    metadata_sliced = metadata.loc[closest_docs_indices]
     
     for index, row in tqdm(metadata_sliced.iterrows()):
         temp = {'title': row['title'],