Merge branch 'develop'

amaiya · Nov 5, 2021 · 1adc59b · 1adc59b
2 parents e5cec4c + 127cc86
commit 1adc59b
Show file tree

Hide file tree

Showing 12 changed files with 580 additions and 22 deletions.
diff --git a/docs/graph/data.html b/docs/graph/data.html
@@ -29,7 +29,6 @@ <h1 class="title">Module <code>ktrain.graph.data</code></h1>
 <pre><code class="python">from ..imports import *
 from .. import utils as U
 from .preprocessor import NodePreprocessor, LinkPreprocessor
-import networkx as nx
 
 
 def graph_nodes_from_csv(nodes_filepath, 
@@ -86,6 +85,10 @@ <h1 class="title">Module <code>ktrain.graph.data</code></h1>
     #----------------------------------------------------------------
     # read graph structure
     #----------------------------------------------------------------
+    try:
+        import networkx as nx
+    except ImportError:
+        raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
     nx_sep = None if sep in [&#39; &#39;, &#39;\t&#39;] else sep
     g_nx = nx.read_edgelist(path=links_filepath, delimiter=nx_sep)
 
@@ -242,6 +245,11 @@ <h1 class="title">Module <code>ktrain.graph.data</code></h1>
         tuple of EdgeSequenceWrapper objects for train and validation sets and LinkPreprocessor
     ```
     &#34;&#34;&#34;
+    try:
+        import networkx as nx
+    except ImportError:
+        raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
+
 
     # import stellargraph
     try:
@@ -357,6 +365,11 @@ <h2 class="section-title" id="header-functions">Functions</h2>
         tuple of EdgeSequenceWrapper objects for train and validation sets and LinkPreprocessor
     ```
     &#34;&#34;&#34;
+    try:
+        import networkx as nx
+    except ImportError:
+        raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
+
 
     # import stellargraph
     try:
@@ -512,6 +525,10 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     #----------------------------------------------------------------
     # read graph structure
     #----------------------------------------------------------------
+    try:
+        import networkx as nx
+    except ImportError:
+        raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
     nx_sep = None if sep in [&#39; &#39;, &#39;\t&#39;] else sep
     g_nx = nx.read_edgelist(path=links_filepath, delimiter=nx_sep)
 

diff --git a/docs/graph/index.html b/docs/graph/index.html
@@ -201,6 +201,11 @@ <h2 class="section-title" id="header-functions">Functions</h2>
         tuple of EdgeSequenceWrapper objects for train and validation sets and LinkPreprocessor
     ```
     &#34;&#34;&#34;
+    try:
+        import networkx as nx
+    except ImportError:
+        raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
+
 
     # import stellargraph
     try:
@@ -448,6 +453,10 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     #----------------------------------------------------------------
     # read graph structure
     #----------------------------------------------------------------
+    try:
+        import networkx as nx
+    except ImportError:
+        raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
     nx_sep = None if sep in [&#39; &#39;, &#39;\t&#39;] else sep
     g_nx = nx.read_edgelist(path=links_filepath, delimiter=nx_sep)
 

diff --git a/docs/graph/models.html b/docs/graph/models.html
@@ -37,10 +37,10 @@ <h1 class="title">Module <code>ktrain.graph.models</code></h1>
 
 GRAPHSAGE = &#39;graphsage&#39;
 NODE_CLASSIFIERS = {
-        GRAPHSAGE: &#39;GraphSAGE:  http://arxiv.org/pdf/1607.01759.pdf&#39;}
+        GRAPHSAGE: &#39;GraphSAGE:  https://arxiv.org/pdf/1706.02216.pdf&#39;}
 
 LINK_PREDICTORS = {
-        GRAPHSAGE: &#39;GraphSAGE:  http://arxiv.org/pdf/1607.01759.pdf&#39;}
+        GRAPHSAGE: &#39;GraphSAGE:  https://arxiv.org/pdf/1706.02216.pdf&#39;}
 
 
 def print_node_classifiers():

diff --git a/docs/graph/preprocessor.html b/docs/graph/preprocessor.html
@@ -29,7 +29,6 @@ <h1 class="title">Module <code>ktrain.graph.preprocessor</code></h1>
 <pre><code class="python">from ..imports import *
 from .. import utils as U
 from ..preprocessor import Preprocessor
-import networkx as nx
 
 
 class NodePreprocessor(Preprocessor):
@@ -176,6 +175,12 @@ <h1 class="title">Module <code>ktrain.graph.preprocessor</code></h1>
         G_te (Graph):  a networkx Graph containing new nodes
         ```
         &#34;&#34;&#34;
+        try:
+            import networkx as nx
+        except ImportError:
+            raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
+
+
         if self.y_encoding is None:
             raise Exception(&#39;Unset parameters. Are you sure you called preprocess_train first?&#39;)
 
@@ -651,6 +656,12 @@ <h3>Methods</h3>
         G_te (Graph):  a networkx Graph containing new nodes
         ```
         &#34;&#34;&#34;
+        try:
+            import networkx as nx
+        except ImportError:
+            raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
+
+
         if self.y_encoding is None:
             raise Exception(&#39;Unset parameters. Are you sure you called preprocess_train first?&#39;)
 
@@ -792,6 +803,12 @@ <h3>Methods</h3>
     G_te (Graph):  a networkx Graph containing new nodes
     ```
     &#34;&#34;&#34;
+    try:
+        import networkx as nx
+    except ImportError:
+        raise ImportError(&#39;Please install networkx:  pip install networkx&#39;)
+
+
     if self.y_encoding is None:
         raise Exception(&#39;Unset parameters. Are you sure you called preprocess_train first?&#39;)
 

diff --git a/docs/text/index.html b/docs/text/index.html
@@ -132,6 +132,10 @@ <h2 class="section-title" id="header-submodules">Sub-modules</h2>
 <dd>
 <div class="desc"></div>
 </dd>
+<dt><code class="name"><a title="ktrain.text.speech" href="speech/index.html">ktrain.text.speech</a></code></dt>
+<dd>
+<div class="desc"></div>
+</dd>
 <dt><code class="name"><a title="ktrain.text.summarization" href="summarization/index.html">ktrain.text.summarization</a></code></dt>
 <dd>
 <div class="desc"></div>
@@ -2508,9 +2512,10 @@ <h3>Inherited members</h3>
     def extract(self, filename=None, text=None,return_format=&#39;document&#39;, lang=None):
         &#34;&#34;&#34;
         ```
-        Extracts text from supplied filename
+        Extracts text from document given file path to document.
         filename(str): path to file,  Mutually-exclusive with text.
         text(str): string to tokenize.  Mutually-exclusive with filename.
+                   The extract method can also simply accept a string and return lists of sentences or paragraphs.
         return_format(str): One of {&#39;document&#39;, &#39;paragraphs&#39;, &#39;sentences&#39;}
                           &#39;document&#39;: returns text of document
                           &#39;paragraphs&#39;: returns a list of paragraphs from document
@@ -2553,9 +2558,10 @@ <h3>Methods</h3>
 <span>def <span class="ident">extract</span></span>(<span>self, filename=None, text=None, return_format='document', lang=None)</span>
 </code></dt>
 <dd>
-<div class="desc"><pre><code>Extracts text from supplied filename
+<div class="desc"><pre><code>Extracts text from document given file path to document.
 filename(str): path to file,  Mutually-exclusive with text.
 text(str): string to tokenize.  Mutually-exclusive with filename.
+           The extract method can also simply accept a string and return lists of sentences or paragraphs.
 return_format(str): One of {'document', 'paragraphs', 'sentences'}
                   'document': returns text of document
                   'paragraphs': returns a list of paragraphs from document
@@ -2569,9 +2575,10 @@ <h3>Methods</h3>
 <pre><code class="python">def extract(self, filename=None, text=None,return_format=&#39;document&#39;, lang=None):
     &#34;&#34;&#34;
     ```
-    Extracts text from supplied filename
+    Extracts text from document given file path to document.
     filename(str): path to file,  Mutually-exclusive with text.
     text(str): string to tokenize.  Mutually-exclusive with filename.
+               The extract method can also simply accept a string and return lists of sentences or paragraphs.
     return_format(str): One of {&#39;document&#39;, &#39;paragraphs&#39;, &#39;sentences&#39;}
                       &#39;document&#39;: returns text of document
                       &#39;paragraphs&#39;: returns a list of paragraphs from document
@@ -5622,6 +5629,7 @@ <h1>Index</h1>
 <li><code><a title="ktrain.text.preprocessor" href="preprocessor.html">ktrain.text.preprocessor</a></code></li>
 <li><code><a title="ktrain.text.qa" href="qa/index.html">ktrain.text.qa</a></code></li>
 <li><code><a title="ktrain.text.shallownlp" href="shallownlp/index.html">ktrain.text.shallownlp</a></code></li>
+<li><code><a title="ktrain.text.speech" href="speech/index.html">ktrain.text.speech</a></code></li>
 <li><code><a title="ktrain.text.summarization" href="summarization/index.html">ktrain.text.summarization</a></code></li>
 <li><code><a title="ktrain.text.textextractor" href="textextractor.html">ktrain.text.textextractor</a></code></li>
 <li><code><a title="ktrain.text.textutils" href="textutils.html">ktrain.text.textutils</a></code></li>

diff --git a/docs/text/qa/index.html b/docs/text/qa/index.html
@@ -26,7 +26,7 @@ <h1 class="title">Module <code>ktrain.text.qa</code></h1>
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">from .core import SimpleQA, AnswerExtractor</code></pre>
+<pre><code class="python">from .core import SimpleQA, AnswerExtractor, QA</code></pre>
 </details>
 </section>
 <section>

diff --git a/docs/text/qa/qa_finetuner.html b/docs/text/qa/qa_finetuner.html
@@ -29,11 +29,15 @@ <h1 class="title">Module <code>ktrain.text.qa.qa_finetuner</code></h1>
 <pre><code class="python">import tensorflow as tf
 
 
-from datasets import Dataset, load_dataset
 import pandas as pd
 import warnings
 
 def convert_to_dataset(list_of_dicts):
+    try:
+        from datasets import Dataset, load_dataset
+    except ImportError:
+        raise ImportError(&#39;The datasets package is required for fine-tuning QA models: pip install datasets&#39;)
+
     new_list = []
     for d in list_of_dicts:
         if &#39;question&#39; not in d or &#39;context&#39; not in d or &#39;answers&#39; not in d:
@@ -74,6 +78,11 @@ <h1 class="title">Module <code>ktrain.text.qa.qa_finetuner</code></h1>
     to the maximum sequence length, or whether we only pad to the maximum length within that batch. The former
     is most useful when training on TPU, as a new graph compilation is required for each sequence length.
     &#34;&#34;&#34;
+    try:
+        from datasets import Dataset, load_dataset
+    except ImportError:
+        raise ImportError(&#39;The datasets package is required for fine-tuning QA models: pip install datasets&#39;)
+
 
     def densify_ragged_batch(features, label=None):
         features = {
@@ -282,6 +291,11 @@ <h2 class="section-title" id="header-functions">Functions</h2>
     to the maximum sequence length, or whether we only pad to the maximum length within that batch. The former
     is most useful when training on TPU, as a new graph compilation is required for each sequence length.
     &#34;&#34;&#34;
+    try:
+        from datasets import Dataset, load_dataset
+    except ImportError:
+        raise ImportError(&#39;The datasets package is required for fine-tuning QA models: pip install datasets&#39;)
+
 
     def densify_ragged_batch(features, label=None):
         features = {
@@ -330,6 +344,11 @@ <h2 class="section-title" id="header-functions">Functions</h2>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">def convert_to_dataset(list_of_dicts):
+    try:
+        from datasets import Dataset, load_dataset
+    except ImportError:
+        raise ImportError(&#39;The datasets package is required for fine-tuning QA models: pip install datasets&#39;)
+
     new_list = []
     for d in list_of_dicts:
         if &#39;question&#39; not in d or &#39;context&#39; not in d or &#39;answers&#39; not in d: