Skip to content

Commit

Permalink
debug-minor-updates (#106)
Browse files Browse the repository at this point in the history
* Add github badges #87

* Disable verbose during predictions #103

* fix typos and tests #95

* Rename variables and scripts #108
  • Loading branch information
fmikaelian committed Apr 29, 2019
1 parent 62f4d8c commit bed1e3e
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# cdQA <img src="https://cdn.discordapp.com/emojis/513893717816508416.gif" width="40" height="40"/>

[![Build Status](https://travis-ci.com/fmikaelian/cdQA.svg?token=Vzy9RRKRZ41ynd9q2BRX&branch=develop)](https://travis-ci.com/fmikaelian/cdQA) [![codecov](https://codecov.io/gh/fmikaelian/cdQA/branch/develop/graph/badge.svg?token=F16X0IU6RT)](https://codecov.io/gh/fmikaelian/cdQA)
[![PyPI Downloads](https://img.shields.io/pypi/v/tensorflow.svg)](https://pypi.org/project/tensorflow/)
[![PyPI Version](https://img.shields.io/pypi/dm/tensorflow.svg)](https://pypi.org/project/tensorflow/)
[![Binder](https://mybinder.org/badge.svg)]()
[![Colab](https://colab.research.google.com/assets/colab-badge.svg)]()
[![License](
https://img.shields.io/badge/License-MIT-yellow.svg)](https://choosealicense.com/licenses/mit/)
[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://choosealicense.com/licenses/mit/)

An end-to-end closed-domain question answering system with BERT and classic IR methods 📚

Expand Down Expand Up @@ -170,7 +171,6 @@ python cdqa/pipeline/download.py

The data is saved in `/data` and the models in `/models`. You can load the models with `joblib.load()`.


### Practical examples

A complete worfklow is described in our [`examples`](examples) notebook.
Expand Down
2 changes: 1 addition & 1 deletion cdqa/reader/bertqa_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ def predict(self, X):
self.model.eval()
all_results = []
logger.info("Start evaluating")
for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating", disable=self.local_rank not in [-1, 0]):
for input_ids, input_mask, segment_ids, example_indices in eval_dataloader:
if len(all_results) % 1000 == 0:
logger.info("Processing example: %d" % (len(all_results)))
input_ids = input_ids.to(self.device)
Expand Down
6 changes: 3 additions & 3 deletions cdqa/retriever/tfidf_doc_ranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ class TfidfRetriever(BaseEstimator):
"""

def __init__(self,
metadata,
ngram_range=(1, 2),
max_df=0.85,
stop_words='english',
paragraphs=None,
top_n=3,
metadata,
verbose=True):

self.ngram_range = ngram_range
Expand All @@ -85,10 +85,10 @@ def predict(self, X):
t0 = time.time()
question_vector = self.vectorizer.transform([X])
scores = pd.DataFrame(self.tfidf_matrix.dot(question_vector.T).toarray())
closest_docs_indices = scores.sort_values(by=0, ascending=False).index[:top_n].values
closest_docs_indices = scores.sort_values(by=0, ascending=False).index[:self.top_n].values

# inspired from https://github.com/facebookresearch/DrQA/blob/50d0e49bb77fe0c6e881efb4b6fe2e61d3f92509/scripts/reader/interactive.py#L63
if verbose:
if self.verbose:
rank = 1
table = prettytable.PrettyTable(['rank', 'index', 'title'])
for i in range(len(closest_docs_indices)):
Expand Down
8 changes: 4 additions & 4 deletions cdqa/utils/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def df2squad(df, squad_version='v2.0', output_dir=None, filename=None):

return json_data

def generate_squad_examples(question, article_indices, metadata):
def generate_squad_examples(question, closest_docs_indices, metadata):
"""
Creates a SQuAD examples json object for a given for a given question using outputs of retriever and document database.
Parameters
----------
question : [type]
[description]
article_indices : [type]
closest_docs_indices : [type]
[description]
metadata : [type]
[description]
Expand All @@ -76,15 +76,15 @@ def generate_squad_examples(question, article_indices, metadata):
--------
>>> from cdqa.utils.converter import generate_squad_examples
>>> squad_examples = generate_squad_examples(question='Since when does the the Excellence Program of BNP Paribas exist?',
article_indices=[788, 408, 2419],
closest_docs_indices=[788, 408, 2419],
metadata=df)
"""


squad_examples = []

metadata_sliced = metadata.loc[article_indices]
metadata_sliced = metadata.loc[closest_docs_indices]

for index, row in tqdm(metadata_sliced.iterrows()):
temp = {'title': row['title'],
Expand Down

0 comments on commit bed1e3e

Please sign in to comment.