jbesomi · mk2510 · Aug 18, 2020 · Aug 19, 2020 · Aug 19, 2020 · Aug 21, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -20,7 +20,7 @@ jobs:
       env: PATH=/c/Python38:/c/Python38/Scripts:$PATH
 install: 
     - pip3 install --upgrade pip  # all three OSes agree about 'pip3'
-    - pip3 install black
+    - pip3 install black==19.10b0
     - pip3 install ".[dev]" .
 # 'python' points to Python 2.7 on macOS but points to Python 3.8 on Linux and Windows
 # 'python3' is a 'command not found' error on Windows but 'py' works on Windows only

diff --git a/setup.cfg b/setup.cfg
@@ -38,10 +38,11 @@ install_requires =
     unidecode>=1.1.1
     gensim>=3.6.0
     matplotlib>=3.1.0
+    pyLDAvis>=2.1.2
 # TODO pick the correct version.
 [options.extras_require]
 dev =
-    black>=19.10b0
+    black==19.10b0
     pytest>=4.0.0
     Sphinx>=3.0.3
     sphinx-markdown-builder>=0.5.4

diff --git a/tests/test_indexes.py b/tests/test_indexes.py
@@ -12,6 +12,13 @@
 s_tokenized_lists = pd.Series([["Test", "Test2"], ["Test3"]], index=[5, 6])
 s_numeric = pd.Series([5.0], index=[5])
 s_numeric_lists = pd.Series([[5.0, 5.0], [6.0, 6.0]], index=[5, 6])
+df_document_term = pd.DataFrame(
+    [[0.125, 0.0, 0.0, 0.125, 0.250], [0.0, 0.25, 0.125, 0.0, 0.125]],
+    index=[5, 6],
+    columns=pd.MultiIndex.from_product([["test"], ["!", ".", "?", "TEST", "Test"]]),
+    dtype="Sparse",
+)
+
 
 # Define all test cases. Every test case is a list
 # of [name of test case, function to test, tuple of valid input for the function].
@@ -56,27 +63,27 @@
 ]
 
 test_cases_representation = [
-    [
-        "count",
-        lambda x: representation.flatten(representation.count(x)),
-        (s_tokenized_lists,),
-    ],
-    [
-        "term_frequency",
-        lambda x: representation.flatten(representation.term_frequency(x)),
-        (s_tokenized_lists,),
-    ],
-    [
-        "tfidf",
-        lambda x: representation.flatten(representation.tfidf(x)),
-        (s_tokenized_lists,),
-    ],
+    ["count", representation.count, (s_tokenized_lists,),],
+    ["term_frequency", representation.term_frequency, (s_tokenized_lists,),],
+    ["tfidf", representation.tfidf, (s_tokenized_lists,),],
     ["pca", representation.pca, (s_numeric_lists, 0)],
     ["nmf", representation.nmf, (s_numeric_lists,)],
     ["tsne", representation.tsne, (s_numeric_lists,)],
+    ["truncatedSVD", representation.tsne, (s_numeric_lists, 1)],
+    ["lda", representation.tsne, (s_numeric_lists, 1)],
     ["kmeans", representation.kmeans, (s_numeric_lists, 1)],
     ["dbscan", representation.dbscan, (s_numeric_lists,)],
     ["meanshift", representation.meanshift, (s_numeric_lists,)],
+    [
+        "topics_from_topic_model",
+        representation.topics_from_topic_model,
+        (s_numeric_lists,),
+    ],
+    [
+        "top_words_per_document",
+        representation.relevant_words_per_document,
+        (df_document_term,),
+    ],
 ]
 
 test_cases_visualization = []
@@ -106,12 +113,22 @@ class AbstractIndexTest(PandasTestCase):
     def test_correct_index(self, name, test_function, valid_input):
         s = valid_input[0]
         result_s = test_function(*valid_input)
-        t_same_index = pd.Series(s.values, s.index)
+
+        if isinstance(s, pd.Series):
+            t_same_index = pd.Series(s.values, s.index)
+        else:
+            t_same_index = pd.DataFrame(s.values, s.index)
+
         self.assertTrue(result_s.index.equals(t_same_index.index))
 
     @parameterized.expand(test_cases)
     def test_incorrect_index(self, name, test_function, valid_input):
         s = valid_input[0]
         result_s = test_function(*valid_input)
-        t_different_index = pd.Series(s.values, index=None)
+
+        if isinstance(s, pd.Series):
+            t_different_index = pd.Series(s.values, index=None)
+        else:
+            t_different_index = pd.DataFrame(s.values, index=None)
+
         self.assertFalse(result_s.index.equals(t_different_index.index))