Merge branch 'develop'

amaiya · Nov 16, 2019 · f91f703 · f91f703
2 parents 7e19f45 + 14558ae
commit f91f703
Show file tree

Hide file tree

Showing 6 changed files with 26 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,19 @@ Most recent releases are shown at the top. Each release shows:
 - **Changed**: Additional parameters, changes to inputs or outputs, etc
 - **Fixed**: Bug fixes that don't change documented behaviour
 
+## 0.6.1 (2019-11-16)
+
+### New:
+- N/A
+
+### Changed:
+- N/A
+
+### Fixed:
+- `TextPredictor.explain` now correcty supports non-English languages.
+- Parameter `activation` is no longer ignored in `_build_bert` function
+
+
 ## 0.6.0 (2019-11-12)
 
 ### New:

diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@
      - **Text Classification**: [BERT](https://arxiv.org/abs/1810.04805), [NBSVM](https://www.aclweb.org/anthology/P12-2018), [fastText](https://arxiv.org/abs/1607.01759), GRUs with [pretrained word vectors](https://fasttext.cc/docs/en/english-vectors.html), and other models <sub><sup>[[example notebook](https://nbviewer.jupyter.org/github/amaiya/ktrain/blob/master/examples/text/IMDb-BERT.ipynb)]</sup></sub>
      - **Sequence Labeling**:  [Bidirectional LSTM-CRF](https://arxiv.org/abs/1603.01360) with optional pretrained word embeddings <sub><sup>[[example notebook](https://nbviewer.jupyter.org/github/amaiya/ktrain/blob/master/tutorial-06-sequence-tagging.ipynb)]</sup></sub>
      - **Unsupervised Topic Modeling** with [LDA](http://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf)  <sub><sup>[[example notebook](https://nbviewer.jupyter.org/github/amaiya/ktrain/blob/master/examples/text/20newsgroups-topic_modeling.ipynb)]</sup></sub>
-     - **Document Similarity with One-Class Learning**:  given some documents of interest, find and score new documents that are semantically similar to them using [One Class Text Classification](https://en.wikipedia.org/wiki/One-class_classification) <sub><sup>[[example notebook](https://nbviewer.jupyter.org/github/amaiya/ktrain/blob/master/examples/text/20newsgroups-document_similarity_scorer.ipynb)]</sup></sub>
+     - **Document Similarity with One-Class Learning**:  given some documents of interest, find and score new documents that are semantically similar to them using [One-Class Text Classification](https://en.wikipedia.org/wiki/One-class_classification) <sub><sup>[[example notebook](https://nbviewer.jupyter.org/github/amaiya/ktrain/blob/master/examples/text/20newsgroups-document_similarity_scorer.ipynb)]</sup></sub>
      - **Document Recommendation Engine**:  given text from a sample document, recommend documents that are semantically similar to it from a larger corpus  <sub><sup>[[example notebook](https://nbviewer.jupyter.org/github/amaiya/ktrain/blob/master/examples/text/20newsgroups-recommendation_engine.ipynb)]</sup></sub>
   - `vision` data:
     - **image classification** (e.g., [ResNet](https://arxiv.org/abs/1512.03385), [Wide ResNet](https://arxiv.org/abs/1605.07146), [Inception](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf)) <sub><sup>[[example notebook](https://nbviewer.jupyter.org/github/amaiya/ktrain/blob/master/examples/vision/dogs_vs_cats-ResNet50.ipynb)]</sup></sub>

diff --git a/ktrain/tests/test_chinese_text.py b/ktrain/tests/test_chinese_text.py
@@ -61,8 +61,8 @@ def test_fasttext_chinese(self):
         p = ktrain.load_predictor('/tmp/test_predictor')
         self.assertEqual(p.predict(TEST_DOC), 'pos')
         self.assertEqual(np.argmax(p.predict_proba([TEST_DOC])[0]), 0)
-        #self.assertEqual(type(p.explain(TEST_DOC)), IPython.core.display.HTML)
-        self.assertEqual(type(p.explain(TEST_DOC)), type(None))
+        self.assertEqual(type(p.explain(TEST_DOC)), IPython.core.display.HTML)
+        #self.assertEqual(type(p.explain(TEST_DOC)), type(None))
 
 
 if __name__ == "__main__":

diff --git a/ktrain/text/models.py b/ktrain/text/models.py
@@ -216,7 +216,7 @@ def _build_bert(x_train, y_train, num_classes,
                                     seq_len=maxlen)
     inputs = model.inputs[:2]
     dense = model.get_layer('NSP-Dense').output
-    outputs = Dense(units=num_classes, activation='softmax')(dense)
+    outputs = Dense(units=num_classes, activation=activation)(dense)
     model = Model(inputs, outputs)
     model.compile(loss=loss_func,
                   optimizer=U.DEFAULT_OPT,

diff --git a/ktrain/text/predictor.py b/ktrain/text/predictor.py
@@ -63,15 +63,18 @@ def predict_proba(self, texts):
         return self.predict(texts, return_proba=True)
 
 
-    def explain(self, doc):
+    def explain(self, doc, truncate_len=512):
         """
         Highlights text to explain prediction
+        Args:
+            doc (str): text of documnet
+            truncate_len(int): truncate document to this many words
         """
         if not isinstance(doc, str): raise Exception('text must of type str')
-        if self.preproc.lang != 'en': 
-            warnings.warn('explain currently only supports English')
-            return
-        doc = ' '.join(doc.split()[:512])
+        if self.preproc.is_nospace_lang():
+            doc = self.preproc.process_chinese([doc])
+            doc = doc[0]
+        doc = ' '.join(doc.split()[:truncate_len])
         te = TextExplainer(random_state=42)
         _ = te.fit(doc, self.predict_proba)
         return te.show_prediction(target_names=self.preproc.get_classes())

diff --git a/ktrain/version.py b/ktrain/version.py
@@ -1,2 +1,2 @@
 __all__ = ['__version__']
-__version__ = '0.6.0'
+__version__ = '0.6.1'