In [2]:
import warnings
import pyLDAvis
from pyLDAvis import gensim_models

from docs_stream import DocumentsManager
from corpus_tokenizer import CorpusTokenizer
from topic_processing import TopicManager


# Turn off warnings
warnings.filterwarnings('ignore')

# Loading the TopicManager
if TopicManager.is_topic_manager_saved():
    topic_manager = TopicManager.saved_topic_manager()

# Do all the process from scratch
else:
    # Load all the documents about Covid-19 from the Wikipedia in the
    # docs/ folder.
    doc_files = DocumentsManager()

    # Tokenize all the Documents loaded using Spacy
    if CorpusTokenizer.are_tokens_saved():
        tokenizer = CorpusTokenizer.saved_tokenizer()
    else:
        tokenizer = CorpusTokenizer(doc_files.documents_texts())

    # Creating the Dictionary and the Corpus Bag-of-Words
    topic_manager = TopicManager(tokenizer)
    

# Loading the dictionary and corpus.
dictionary = topic_manager.dictionary
corpus_bow = topic_manager.corpus_bow

# Loading the LDA Model
# Set training parameters.
num_topics = 4
chunksize = 20
passes = 10
iterations = 400
eval_every = None

# Create and train the LDA Model
lda_model = topic_manager.lda_model(num_topics,
                                    chunksize,
                                    passes,
                                    iterations,
                                    eval_every)

# Use PyLDAvis to vizualize the topics
visual_data = gensim_models.prepare(topic_model=lda_model, corpus=corpus_bow,
                                    dictionary=dictionary)
pyLDAvis.enable_notebook()
pyLDAvis.display(visual_data)