Skip to content

Commit

Permalink
updated delete_documents method
Browse files Browse the repository at this point in the history
  • Loading branch information
ddangelov committed Sep 25, 2020
1 parent 3ee4723 commit 5c326a2
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
4 changes: 4 additions & 0 deletions top2vec/Top2Vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,10 @@ def delete_documents(self, doc_ids):
# get document indexes from ids
doc_indexes = self._get_document_indexes(doc_ids)

# delete documents
if self.documents is not None:
self.documents = np.delete(self.documents, doc_indexes, 0)

# delete document ids
if self.document_ids is not None:
for doc_id in doc_ids:
Expand Down
16 changes: 13 additions & 3 deletions top2vec/tests/test_top2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

@pytest.mark.parametrize('top2vec_model', [top2vec, top2vec_docids, top2vec_no_docs, top2vec_corpus_file])
def test_add_documents_original(top2vec_model):

num_docs = top2vec_model.model.docvecs.vectors_docs.shape[0]

docs_to_add = newsgroups_train.data[0:100]
Expand All @@ -40,7 +39,11 @@ def test_add_documents_original(top2vec_model):
topic_count_sum_new = sum(top2vec_model.get_topic_sizes()[0])
num_docs_new = top2vec_model.model.docvecs.vectors_docs.shape[0]

assert topic_count_sum + len(docs_to_add) == topic_count_sum_new == num_docs + len(docs_to_add) == num_docs_new
assert topic_count_sum + len(docs_to_add) == topic_count_sum_new == num_docs + len(docs_to_add) \
== num_docs_new

if top2vec_model.documents is not None:
assert num_docs_new == len(top2vec_model.documents)


@pytest.mark.parametrize('top2vec_model', [top2vec, top2vec_docids, top2vec_no_docs, top2vec_corpus_file])
Expand Down Expand Up @@ -79,6 +82,9 @@ def test_add_documents_post_reduce(top2vec_model):
assert topic_count_sum + len(docs_to_add) == topic_count_sum_new == topic_count_reduced_sum + len(docs_to_add) \
== topic_count_reduced_sum_new == num_docs + len(docs_to_add) == num_docs_new

if top2vec_model.documents is not None:
assert num_docs_new == len(top2vec_model.documents)


@pytest.mark.parametrize('top2vec_model', [top2vec, top2vec_docids, top2vec_no_docs, top2vec_corpus_file])
def test_delete_documents(top2vec_model):
Expand All @@ -99,7 +105,11 @@ def test_delete_documents(top2vec_model):
num_docs_new = top2vec_model.model.docvecs.vectors_docs.shape[0]

assert topic_count_sum - len(doc_ids_to_delete) == topic_count_sum_new == topic_count_reduced_sum - \
len(doc_ids_to_delete) == topic_count_reduced_sum_new == num_docs - len(doc_ids_to_delete) == num_docs_new
len(doc_ids_to_delete) == topic_count_reduced_sum_new == num_docs - len(doc_ids_to_delete) \
== num_docs_new

if top2vec_model.documents is not None:
assert num_docs_new == len(top2vec_model.documents)


@pytest.mark.parametrize('top2vec_model', [top2vec, top2vec_docids, top2vec_no_docs, top2vec_corpus_file])
Expand Down

0 comments on commit 5c326a2

Please sign in to comment.