Merge branch 'develop'

amaiya · Dec 11, 2019 · 8a22e83 · 8a22e83
2 parents ec422fe + 9e62b0d
commit 8a22e83
Show file tree

Hide file tree

Showing 11 changed files with 156 additions and 48 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,20 @@ Most recent releases are shown at the top. Each release shows:
 - **Changed**: Additional parameters, changes to inputs or outputs, etc
 - **Fixed**: Bug fixes that don't change documented behaviour
 
+## 0.7.1 (2019-12-11)
+
+### New:
+- All data-loading functions (e.g., `texts_from_csv`) accept a `random_state` argument
+that will enable consistent reproduction of the train-test split.
+
+### Changed:
+- perform local checks for `stellargraph` where needed.  
+- removed `stellargraph` as dependency due to issues with it overwriting `tensorflow-gpu`
+
+### Fixed:
+- N/A
+
+
 ## 0.7.0 (2019-12-10)
 
 ### New:

diff --git a/README.md b/README.md
@@ -1,4 +1,8 @@
 
+### [Overview](#overview) | [Tutorials](#tutorials) | [Examples](#examples) |  [Installation](#installation)
+
+# ktrain
+
 
 
 ### News and Announcements
@@ -14,8 +18,8 @@
 
 ----
 
+### Overview
 
-# ktrain
 *ktrain* is a lightweight wrapper for the deep learning library [Keras](https://keras.io/) (and other libraries) to help build, train, and deploy neural networks.  With only a few lines of code, ktrain allows you to easily and quickly:
 
 - estimate an optimal learning rate for your model given your data using a Learning Rate Finder
@@ -57,13 +61,18 @@ Some blog tutorials about *ktrain* are shown below:
 
 > [**BERT Text Classification in 3 Lines of Code**](https://towardsdatascience.com/bert-text-classification-in-3-lines-of-code-using-keras-264db7e7a358)  
 
-> [**Explainable AI in Practice**](https://medium.com/@asmaiya/explainable-ai-in-practice-2e5ae2d16dc7) 
+> [**Explainable AI in Practice**](https://medium.com/@asmaiya/explainable-ai-in-practice-2e5ae2d16dc7)
+
+
+
+
 
 
-Using *ktrain* on **Google Colab**?  See [this simple demo of Multiclass Text Classification with BERT](https://colab.research.google.com/drive/1AH3fkKiEqBpVpO5ua00scp7zcHs5IDLK).
 
 
 
+### Examples
+
 Tasks such as text classification and image classification can be accomplished easily with 
 only a few lines of code.
 
@@ -177,7 +186,9 @@ learner.autofit(0.01, checkpoint_folder='/tmp/saved_weights')
 ```
 
 
-Additional examples can be found [here](https://github.com/amaiya/ktrain/tree/master/examples).
+Using *ktrain* on **Google Colab**?  See [this simple demo of Multiclass Text Classification with BERT](https://colab.research.google.com/drive/1AH3fkKiEqBpVpO5ua00scp7zcHs5IDLK).
+
+**Additional examples can be found [here](https://github.com/amaiya/ktrain/tree/master/examples).**
 
 
 
@@ -194,9 +205,22 @@ Make sure pip is up-to-date with: `pip3 install -U pip`.
 
 2. Install *ktrain*: `pip3 install ktrain`
 
-The *ktrain* package can be used with TensorFlow versions 1.14 and 2.0.  If using TensorFlow 2.0, *ktrain*
+**Some things to note:**
+- The *ktrain* package can be used with either TensorFlow 2.0 or TensorFlow 1.14.  If using TensorFlow 2.0, *ktrain*
 presently runs in 1.x mode using [tf.compat.v1.disable_v2_behavior](https://www.tensorflow.org/api_docs/python/tf/compat/v1/disable_v2_behavior).  In the future, this will be removed and **only** TensorFlow 2 will be supported.
 
+- Since some *ktrain* dependencies have not yet been migrated to `tf.keras` in TensorFlow 2 (or may have other issues), 
+  *ktrain* is temporarily using forked versions of some libraries. Specifically, *ktrain* uses forked versions
+  `eli5` and `stellargraph`.  If not installed, *ktrain* will complain  when a method or function needing 
+  either of these libraries is invoked.
+  To install these forked versions, you can do the following:
+```
+pip3 install git+https://github.com/amaiya/eli5@tfkeras_0_10_1
+pip3 install git+https://github.com/amaiya/stellargraph@no_tf_dep_082
+```
+
+
+
 
 <!--
 ### Requirements

diff --git a/ktrain/graph/data.py b/ktrain/graph/data.py
@@ -1,6 +1,5 @@
 from ..imports import *
 from .. import utils as U
-from .node_generator import NodeSequenceWrapper
 from .preprocessor import NodePreprocessor
 
 
@@ -12,6 +11,7 @@ def graph_nodes_from_csv(nodes_filepath,
                          holdout_pct=None, 
                          holdout_for_inductive=False,
                          missing_label_value=None,
+                         random_state=None,
                          verbose=1):
     """
     Loads graph data from CSV files. 
@@ -43,6 +43,7 @@ def graph_nodes_from_csv(nodes_filepath,
                                       If False, holdout nodes will be included in graph
                                       and their features (but not labels) are accessible
                                       during training.
+        random_state (int):  random seed for train/test split
         verbose (boolean): verbosity
     Return:
         tuple of NodeSequenceWrapper objects for train and validation sets and NodePreprocessor
@@ -144,7 +145,7 @@ def graph_nodes_from_csv(nodes_filepath,
                                                         train_size=train_pct,
                                                         test_size=None,
                                                         stratify=df_annotated['target'], 
-                                                        random_state=None)
+                                                        random_state=random_state)
     #te_data, test_data = sklearn.model_selection.train_test_split(test_data,
                                                                 #train_size=0.2,
                                                                 #test_size=None,
@@ -175,6 +176,7 @@ def graph_nodes_from_csv(nodes_filepath,
     preproc = NodePreprocessor(G, df_G, sample_size=sample_size, missing_label_value=missing_label_value)
     trn = preproc.preprocess_train(list(tr_data.index))
     val = preproc.preprocess_valid(list(te_data.index))
+    from .node_generator import NodeSequenceWrapper
     if df_holdout is not None and G_holdout is not None: 
         return (NodeSequenceWrapper(trn), NodeSequenceWrapper(val), preproc, df_holdout, G_holdout)
     else:

diff --git a/ktrain/graph/models.py b/ktrain/graph/models.py
@@ -1,9 +1,9 @@
 from ..imports import *
 from .. import utils as U
-from .node_generator import NodeSequenceWrapper
-import stellargraph as sg
-from stellargraph.mapper import GraphSAGENodeGenerator, GraphSAGELinkGenerator
-from stellargraph.layer import GraphSAGE
+
+
+
+
 
 
 
@@ -31,6 +31,7 @@ def graph_node_classifier(name, train_data, layer_sizes=[32,32], verbose=1):
     Return:
         model (Model): A Keras Model instance
     """
+    from .node_generator import NodeSequenceWrapper
 
     # check argument
     if not isinstance(train_data, NodeSequenceWrapper):
@@ -53,6 +54,19 @@ def graph_node_classifier(name, train_data, layer_sizes=[32,32], verbose=1):
     loss_func = 'categorical_crossentropy'
     activation = 'softmax'
 
+    # import stellargraph
+    try:
+        import stellargraph as sg
+        from stellargraph.layer import GraphSAGE
+    except:
+        raise Exception(SG_ERRMSG)
+    if version.parse(sg.__version__) < version.parse('0.8'):
+        raise Exception(SG_ERRMSG)
+
+
+
+
+
     # build a GraphSAGE node classification model
     graphsage_model = GraphSAGE(
         layer_sizes=layer_sizes,

diff --git a/ktrain/graph/node_generator.py b/ktrain/graph/node_generator.py
@@ -1,4 +1,15 @@
-from stellargraph.mapper import node_mappers
+from ..imports import *
+
+
+# import stellargraph
+try:
+    import stellargraph as sg
+    from stellargraph.mapper import node_mappers
+except:
+    raise Exception(SG_ERRMSG)
+if version.parse(sg.__version__) < version.parse('0.8'):
+    raise Exception(SG_ERRMSG)
+
 
 class NodeSequenceWrapper(node_mappers.NodeSequence):
     def __init__(self, node_seq):

diff --git a/ktrain/graph/preprocessor.py b/ktrain/graph/preprocessor.py
@@ -1,12 +1,6 @@
 from ..imports import *
 from .. import utils as U
 from ..preprocessor import Preprocessor
-from .node_generator import NodeSequenceWrapper
-
-import stellargraph as sg
-from stellargraph.mapper import GraphSAGENodeGenerator, GraphSAGELinkGenerator
-from stellargraph.layer import GraphSAGE
-
 
 
 class NodePreprocessor(Preprocessor):
@@ -79,11 +73,24 @@ def preprocess_train(self, node_ids):
         train_targets = self.y_encoding.fit_transform(df_tr[["target"]].to_dict('records'))
 
 
+
+        # import stellargraph
+        try:
+            import stellargraph as sg
+            from stellargraph.mapper import GraphSAGENodeGenerator
+        except:
+            raise Exception(SG_ERRMSG)
+        if version.parse(sg.__version__) < version.parse('0.8'):
+            raise Exception(SG_ERRMSG)
+
+
+
         # return generator
         G_sg = sg.StellarGraph(self.G, node_features=self.df[self.feature_names])
         self.G_sg = G_sg
         generator = GraphSAGENodeGenerator(G_sg, U.DEFAULT_BS, [self.sampsize, self.sampsize])
         train_gen = generator.flow(df_tr.index, train_targets, shuffle=True)
+        from .node_generator import NodeSequenceWrapper
         return NodeSequenceWrapper(train_gen)
 
 
@@ -103,11 +110,23 @@ def preprocess_valid(self, node_ids):
         # one-hot-encode target
         val_targets = self.y_encoding.transform(df_val[["target"]].to_dict('records'))
 
+
+        # import stellargraph
+        try:
+            import stellargraph as sg
+            from stellargraph.mapper import GraphSAGENodeGenerator
+        except:
+            raise Exception(SG_ERRMSG)
+        if version.parse(sg.__version__) < version.parse('0.8'):
+            raise Exception(SG_ERRMSG)
+
+
         # return generator
         if self.G_sg is None:
             self.G_sg = sg.StellarGraph(self.G, node_features=self.df[self.feature_names])
         generator = GraphSAGENodeGenerator(self.G_sg, U.DEFAULT_BS, [self.sampsize,self.sampsize])
         val_gen = generator.flow(df_val.index, val_targets, shuffle=False)
+        from .node_generator import NodeSequenceWrapper
         return NodeSequenceWrapper(val_gen)
 
 
@@ -140,10 +159,22 @@ def preprocess_test(self, df_te, G_te):
         else:
             test_targets = [-1] * len(df_te.shape[0])
 
+
+        # import stellargraph
+        try:
+            import stellargraph as sg
+            from stellargraph.mapper import GraphSAGENodeGenerator
+        except:
+            raise Exception(SG_ERRMSG)
+        if version.parse(sg.__version__) < version.parse('0.8'):
+            raise Exception(SG_ERRMSG)
+
+
         # return generator
         G_sg = sg.StellarGraph(G_agg, node_features=df_agg[self.feature_names])
         generator = GraphSAGENodeGenerator(G_sg, U.DEFAULT_BS, [self.sampsize,self.sampsize])
         test_gen = generator.flow(df_te.index, test_targets, shuffle=False)
+        from .node_generator import NodeSequenceWrapper
         return NodeSequenceWrapper(test_gen)
 
 

diff --git a/ktrain/imports.py b/ktrain/imports.py
@@ -12,7 +12,10 @@
 
 # TF2-transition
 import tensorflow.compat.v1 as tf
-#logging.getLogger('tensorflow').setLevel(logging.CRITICAL)
+if tf.__version__.startswith('1.14'):
+    try:
+        logging.getLogger('tensorflow').setLevel(logging.CRITICAL)
+    except: pass
 tf.logging.set_verbosity(tf.logging.ERROR)
 tf.disable_v2_behavior()
 
@@ -189,6 +192,9 @@
 from seqeval.metrics import f1_score as ner_f1_score
 from seqeval.metrics.sequence_labeling import get_entities
 
+# packaging
+from packaging import version
+
 
 
 try:
@@ -197,4 +203,9 @@
 except:
     PIL_INSTALLED = False
 
+SG_ERRMSG = 'ktrain currently uses a forked version of stellargraph v0.8.2. '+\
+            'Please install with: '+\
+            'pip3 install git+https://github.com/amaiya/stellargraph@no_tf_dep_082'
+
+