Skip to content

Commit

Permalink
bug fix for corpus_file
Browse files Browse the repository at this point in the history
  • Loading branch information
ddangelov committed Sep 28, 2020
1 parent 5c326a2 commit b82de5a
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions top2vec/Top2Vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,10 @@ def __init__(self, documents, min_count=50, speed="learn", use_corpus_file=False

if use_corpus_file:
logger.info('Pre-processing documents for training')
processed = [self._tokenizer(doc) for doc in documents]
lines = [' '.join(line) + "\n" for line in processed]
processed = [' '.join(self._tokenizer(doc)) for doc in documents]
lines = "\n".join(processed)
temp = tempfile.NamedTemporaryFile(mode='w+t')
temp.writelines(lines)
temp.write(lines)

logger.info('Creating joint document/word embedding')
if workers is None:
Expand Down

0 comments on commit b82de5a

Please sign in to comment.