document index fix

ddangelov · Jun 9, 2021 · a7b0d53 · a7b0d53
1 parent e133bb1
commit a7b0d53
Showing 1 changed file with 6 additions and 5 deletions.
diff --git a/top2vec/Top2Vec.py b/top2vec/Top2Vec.py
@@ -292,6 +292,7 @@ def __init__(self,
                 temp.write(lines)
                 doc2vec_args["corpus_file"] = temp.name
 
+
             else:
                 train_corpus = [TaggedDocument(tokenizer(doc), [i]) for i, doc in enumerate(documents)]
                 doc2vec_args["documents"] = train_corpus
@@ -363,8 +364,8 @@ def return_doc(doc):
 
         if hdbscan_args is None:
             hdbscan_args = {'min_cluster_size': 15,
-                             'metric': 'euclidean',
-                             'cluster_selection_method': 'eom'}
+                            'metric': 'euclidean',
+                            'cluster_selection_method': 'eom'}
 
         cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(umap_model.embedding_)
 
@@ -1211,16 +1212,16 @@ def add_documents(self, documents, doc_ids=None, tokenizer=None, use_embedding_m
         # update index
         if self.documents_indexed:
             # update capacity of index
-            current_max = self.documents_index.get_max_elements()
+            current_max = self.document_index.get_max_elements()
             updated_max = current_max + len(documents)
-            self.documents_index.resize_index(updated_max)
+            self.document_index.resize_index(updated_max)
 
             # update index_id and doc_ids
             start_index_id = max(self.index_id2doc_id.keys()) + 1
             new_index_ids = list(range(start_index_id, start_index_id + len(doc_ids)))
             self.index_id2doc_id.update(dict(zip(new_index_ids, doc_ids)))
             self.doc_id2index_id.update(dict(zip(doc_ids, new_index_ids)))
-            self.documents_index.add_items(document_vectors, new_index_ids)
+            self.document_index.add_items(document_vectors, new_index_ids)
 
         # update topics
         self._assign_documents_to_topic(document_vectors, hierarchy=False)