# Topic Modeling - LDA

In [4]:
import glob
from datetime import datetime
import logging
import logging as log
import gensim
import matplotlib.pyplot as plt
import pyLDAvis
import pyLDAvis.gensim
from gensim.models import CoherenceModel
from sklearn.externals import joblib
import gzip
from multiprocessing import Pool
# from topic_coherence import ModelSimilarity, WithinTopicMeasure
import math
from gensim.corpora import Dictionary, MmCorpus
from gensim.models.word2vec import LineSentence

%matplotlib notebook
# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [5]:
class MyDocuments(object):
    def __init__(self, dirname):
        self.dirname = dirname
 
    def __iter__(self):
        with gzip.open(self.dirname, 'rb') as f:
            for line in f:
                yield line.decode().split('\t')[1].split()

In [3]:
%%time


dictionary_filepath = 'data/eos/dic_bow/bigram_dict_%s.dict' 
bow_filepath = 'data/eos/dic_bow/bigram_bow_corpus_%s.mm' 

def prepare_corpus():
    window_list = glob.glob('dynamic_nmf/data/windowbin/slices/tokenized_window_20*.gz')
    window_list.sort()
    print (window_list)
    
    for window_docs in window_list:
        print(window_docs)
        print(bow_filepath % window_docs[-10: -3])
        # Generate dictionary       
        dictionary = build_dict (window_docs)      

        # generate bag-of-words representations for
        # all reviews and save them as a matrix
        MmCorpus.serialize(bow_filepath % window_docs[-10:-3], doc2bow_generator(window_docs, dictionary))
        corpus = MmCorpus(bow_filepath % window_docs[-10:-3])

        print(dictionary)
        print(corpus)
    
    
def build_dict (final_doc_filepath):
    
    print ("start dictionary " + datetime.now().strftime("%H:%M:%S"))
    docs = MyDocuments(final_doc_filepath)

    # learn the dictionary by iterating over all of the reviews
    dictionary = Dictionary(docs)
    
    # filter tokens that are very rare or too common from
    # the dictionary (filter_extremes) and reassign integer ids (compactify)
    dictionary.filter_extremes(no_below=3)
    dictionary.compactify()

    dictionary.save(dictionary_filepath % final_doc_filepath[-10: -3] )
    print ("finished dictionary " + datetime.now().strftime("%H:%M:%S"))
    return dictionary
    
    
def doc2bow_generator(filepath, dictionary):
    """
    generator function to read reviews from a file
    and yield a bag-of-words representation
    """
    
    for doc in LineSentence(filepath):
        yield dictionary.doc2bow(doc)

prepare_corpus()

['dynamic_nmf/data/windowbin/slices/tokenized_window_2012_01.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_02.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_03.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_04.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_05.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_06.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_07.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_08.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_09.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_10.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_11.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2012_12.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2013_01.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2013_02.gz', 'dynamic_nmf/data/windowbin/slices/tokenized_window_2013_03.gz', 'dynamic_nmf/data/window

Dictionary(35730 unique tokens: ['ratline', '450', 'norah_jones', 'mark_heinrich', 'prosthetic']...)
MmCorpus(6506 documents, 35730 features, 1594731 non-zero entries)
dynamic_nmf/data/windowbin/slices/tokenized_window_2013_01.gz
data/eos/dic_bow/bigram_bow_corpus_2013_01.mm
start dictionary 22:48:00
finished dictionary 22:48:04
Dictionary(45843 unique tokens: ['bipartisan_support', '14,500', 'musulman', 'mark_heinrich', 'circumspect']...)
MmCorpus(12295 documents, 45843 features, 2747048 non-zero entries)
dynamic_nmf/data/windowbin/slices/tokenized_window_2013_02.gz
data/eos/dic_bow/bigram_bow_corpus_2013_02.mm
start dictionary 22:48:11
finished dictionary 22:48:15
Dictionary(43127 unique tokens: ['bipartisan_support', 'june18', 'norah_jones', 'ruemmler', '450']...)
MmCorpus(12745 documents, 43127 features, 2631768 non-zero entries)
dynamic_nmf/data/windowbin/slices/tokenized_window_2013_03.gz
data/eos/dic_bow/bigram_bow_corpus_2013_03.mm
start dictionary 22:48:22
finished dictionary 

Dictionary(970 unique tokens: ['warrant', 'nothing', 'establish', 'hard', 'right']...)
MmCorpus(38 documents, 970 features, 5207 non-zero entries)
dynamic_nmf/data/windowbin/slices/tokenized_window_2015_09.gz
data/eos/dic_bow/bigram_bow_corpus_2015_09.mm
start dictionary 22:53:15
finished dictionary 22:53:15
Dictionary(1251 unique tokens: ['2007', 'nothing', 'parliament', 'establish', 'hard']...)
MmCorpus(55 documents, 1251 features, 7078 non-zero entries)
dynamic_nmf/data/windowbin/slices/tokenized_window_2015_10.gz
data/eos/dic_bow/bigram_bow_corpus_2015_10.mm
start dictionary 22:53:15
finished dictionary 22:53:18
Dictionary(29224 unique tokens: ['bipartisan_support', 'moment', '450', 'mark_heinrich', 'circumspect']...)
MmCorpus(8470 documents, 29224 features, 2106570 non-zero entries)
dynamic_nmf/data/windowbin/slices/tokenized_window_2015_11.gz
data/eos/dic_bow/bigram_bow_corpus_2015_11.mm
start dictionary 22:53:23
finished dictionary 22:53:26
Dictionary(30816 unique tokens: ['fiss

In [4]:
def generate_lda(dictionary, corpus, limit, window_docs=''):
    """
    Function to generate num_topics - LDA  
    
    Parameters:
    ----------
    dictionary : Gensim dictionary
    corpus : Gensim corpus
    limit : topic limit

    Range is incremented by 2
    """
    for num_topics in range(10, limit + 2, 2):
        print("Starting num topic {} ".format(num_topics) + datetime.now().strftime("%H:%M:%S"))
        mode_path = 'data/eos/lda/LDAmodel_%s_K_%02d.pkl' % (window_docs, num_topics)
        print(mode_path)
        lm = gensim.models.ldamulticore.LdaMulticore(corpus=corpus, num_topics=num_topics, 
                                                     id2word=dictionary, chunksize=2000, workers=7)
        joblib.dump(lm, mode_path)
        print("Finished num topic {} ".format(num_topics) + datetime.now().strftime("%H:%M:%S"))

In [5]:
%%time


def generate_lda_windows():
    window_list = glob.glob('dynamic_nmf/data/windowbin/slices/tokenized_window_20*.gz')
    window_list.sort()
    
    for window_docs in window_list:
        window_docs = window_docs[-10: -3]
        print("Starting topic modeing window {} ".format(window_docs) + datetime.now().strftime("%H:%M:%S"))
        print(dictionary_filepath % window_docs)
        print(bow_filepath % window_docs)
        # Load to memory
        corpus = gensim.corpora.MmCorpus(bow_filepath % window_docs)
        dictionary = gensim.corpora.Dictionary.load(dictionary_filepath % window_docs)

        generate_lda(dictionary=dictionary, corpus=corpus, limit=30, window_docs=window_docs)
        print("Finished topic modeing window {} ".format(window_docs) + datetime.now().strftime("%H:%M:%S"))
        
generate_lda_windows()

Starting topic modeing window 2012_01 22:56:25
data/eos/dic_bow/bigram_dict_2012_01.dict
data/eos/dic_bow/bigram_bow_corpus_2012_01.mm
Starting num topic 10 22:56:25
data/eos/lda/LDAmodel_2012_01_K_10.pkl
Finished num topic 10 22:56:59
Starting num topic 12 22:56:59
data/eos/lda/LDAmodel_2012_01_K_12.pkl
Finished num topic 12 22:57:37
Starting num topic 14 22:57:37
data/eos/lda/LDAmodel_2012_01_K_14.pkl
Finished num topic 14 22:58:22
Starting num topic 16 22:58:22
data/eos/lda/LDAmodel_2012_01_K_16.pkl
Finished num topic 16 22:59:11
Starting num topic 18 22:59:11
data/eos/lda/LDAmodel_2012_01_K_18.pkl
Finished num topic 18 23:00:07
Starting num topic 20 23:00:07
data/eos/lda/LDAmodel_2012_01_K_20.pkl
Finished num topic 20 23:01:12
Starting num topic 22 23:01:12
data/eos/lda/LDAmodel_2012_01_K_22.pkl
Finished num topic 22 23:02:22
Starting num topic 24 23:02:22
data/eos/lda/LDAmodel_2012_01_K_24.pkl
Finished num topic 24 23:03:42
Starting num topic 26 23:03:42
data/eos/lda/LDAmodel_2012

Finished num topic 16 00:44:02
Starting num topic 18 00:44:02
data/eos/lda/LDAmodel_2012_07_K_18.pkl
Finished num topic 18 00:44:58
Starting num topic 20 00:44:58
data/eos/lda/LDAmodel_2012_07_K_20.pkl
Finished num topic 20 00:46:01
Starting num topic 22 00:46:01
data/eos/lda/LDAmodel_2012_07_K_22.pkl
Finished num topic 22 00:47:11
Starting num topic 24 00:47:11
data/eos/lda/LDAmodel_2012_07_K_24.pkl
Finished num topic 24 00:48:28
Starting num topic 26 00:48:28
data/eos/lda/LDAmodel_2012_07_K_26.pkl
Finished num topic 26 00:49:51
Starting num topic 28 00:49:51
data/eos/lda/LDAmodel_2012_07_K_28.pkl
Finished num topic 28 00:51:21
Starting num topic 30 00:51:21
data/eos/lda/LDAmodel_2012_07_K_30.pkl
Finished num topic 30 00:53:00
Finished topic modeing window 2012_07 00:53:00
Starting topic modeing window 2012_08 00:53:00
data/eos/dic_bow/bigram_dict_2012_08.dict
data/eos/dic_bow/bigram_bow_corpus_2012_08.mm
Starting num topic 10 00:53:00
data/eos/lda/LDAmodel_2012_08_K_10.pkl
Finished n

Finished num topic 26 01:28:30
Starting num topic 28 01:28:30
data/eos/lda/LDAmodel_2013_01_K_28.pkl
Finished num topic 28 01:29:34
Starting num topic 30 01:29:34
data/eos/lda/LDAmodel_2013_01_K_30.pkl
Finished num topic 30 01:30:41
Finished topic modeing window 2013_01 01:30:41
Starting topic modeing window 2013_02 01:30:41
data/eos/dic_bow/bigram_dict_2013_02.dict
data/eos/dic_bow/bigram_bow_corpus_2013_02.mm
Starting num topic 10 01:30:41
data/eos/lda/LDAmodel_2013_02_K_10.pkl
Finished num topic 10 01:31:09
Starting num topic 12 01:31:09
data/eos/lda/LDAmodel_2013_02_K_12.pkl
Finished num topic 12 01:31:38
Starting num topic 14 01:31:38
data/eos/lda/LDAmodel_2013_02_K_14.pkl
Finished num topic 14 01:32:11
Starting num topic 16 01:32:11
data/eos/lda/LDAmodel_2013_02_K_16.pkl
Finished num topic 16 01:32:48
Starting num topic 18 01:32:48
data/eos/lda/LDAmodel_2013_02_K_18.pkl
Finished num topic 18 01:33:29
Starting num topic 20 01:33:29
data/eos/lda/LDAmodel_2013_02_K_20.pkl
Finished n

Finished num topic 10 02:49:23
Starting num topic 12 02:49:23
data/eos/lda/LDAmodel_2013_08_K_12.pkl
Finished num topic 12 02:50:03
Starting num topic 14 02:50:03
data/eos/lda/LDAmodel_2013_08_K_14.pkl
Finished num topic 14 02:50:52
Starting num topic 16 02:50:52
data/eos/lda/LDAmodel_2013_08_K_16.pkl
Finished num topic 16 02:51:47
Starting num topic 18 02:51:47
data/eos/lda/LDAmodel_2013_08_K_18.pkl
Finished num topic 18 02:52:49
Starting num topic 20 02:52:49
data/eos/lda/LDAmodel_2013_08_K_20.pkl
Finished num topic 20 02:53:59
Starting num topic 22 02:53:59
data/eos/lda/LDAmodel_2013_08_K_22.pkl
Finished num topic 22 02:55:16
Starting num topic 24 02:55:16
data/eos/lda/LDAmodel_2013_08_K_24.pkl
Finished num topic 24 02:56:44
Starting num topic 26 02:56:44
data/eos/lda/LDAmodel_2013_08_K_26.pkl
Finished num topic 26 02:58:21
Starting num topic 28 02:58:21
data/eos/lda/LDAmodel_2013_08_K_28.pkl
Finished num topic 28 03:00:04
Starting num topic 30 03:00:04
data/eos/lda/LDAmodel_2013_08

Finished num topic 20 03:45:28
Starting num topic 22 03:45:28
data/eos/lda/LDAmodel_2014_02_K_22.pkl
Finished num topic 22 03:46:17
Starting num topic 24 03:46:17
data/eos/lda/LDAmodel_2014_02_K_24.pkl
Finished num topic 24 03:47:09
Starting num topic 26 03:47:09
data/eos/lda/LDAmodel_2014_02_K_26.pkl
Finished num topic 26 03:48:07
Starting num topic 28 03:48:07
data/eos/lda/LDAmodel_2014_02_K_28.pkl
Finished num topic 28 03:49:06
Starting num topic 30 03:49:06
data/eos/lda/LDAmodel_2014_02_K_30.pkl
Finished num topic 30 03:50:10
Finished topic modeing window 2014_02 03:50:10
Starting topic modeing window 2014_03 03:50:10
data/eos/dic_bow/bigram_dict_2014_03.dict
data/eos/dic_bow/bigram_bow_corpus_2014_03.mm
Starting num topic 10 03:50:10
data/eos/lda/LDAmodel_2014_03_K_10.pkl
Finished num topic 10 03:50:40
Starting num topic 12 03:50:40
data/eos/lda/LDAmodel_2014_03_K_12.pkl
Finished num topic 12 03:51:13
Starting num topic 14 03:51:13
data/eos/lda/LDAmodel_2014_03_K_14.pkl
Finished n

Finished num topic 30 04:52:20
Finished topic modeing window 2014_08 04:52:20
Starting topic modeing window 2014_09 04:52:20
data/eos/dic_bow/bigram_dict_2014_09.dict
data/eos/dic_bow/bigram_bow_corpus_2014_09.mm
Starting num topic 10 04:52:20
data/eos/lda/LDAmodel_2014_09_K_10.pkl
Finished num topic 10 04:53:21
Starting num topic 12 04:53:21
data/eos/lda/LDAmodel_2014_09_K_12.pkl
Finished num topic 12 04:54:24
Starting num topic 14 04:54:24
data/eos/lda/LDAmodel_2014_09_K_14.pkl
Finished num topic 14 04:55:36
Starting num topic 16 04:55:36
data/eos/lda/LDAmodel_2014_09_K_16.pkl
Finished num topic 16 04:56:52
Starting num topic 18 04:56:52
data/eos/lda/LDAmodel_2014_09_K_18.pkl
Finished num topic 18 04:58:13
Starting num topic 20 04:58:13
data/eos/lda/LDAmodel_2014_09_K_20.pkl
Finished num topic 20 04:59:38
Starting num topic 22 04:59:38
data/eos/lda/LDAmodel_2014_09_K_22.pkl
Finished num topic 22 05:01:08
Starting num topic 24 05:01:08
data/eos/lda/LDAmodel_2014_09_K_24.pkl
Finished n

Finished num topic 14 05:48:17
Starting num topic 16 05:48:17
data/eos/lda/LDAmodel_2015_03_K_16.pkl
Finished num topic 16 05:48:17
Starting num topic 18 05:48:17
data/eos/lda/LDAmodel_2015_03_K_18.pkl
Finished num topic 18 05:48:18
Starting num topic 20 05:48:18
data/eos/lda/LDAmodel_2015_03_K_20.pkl
Finished num topic 20 05:48:19
Starting num topic 22 05:48:19
data/eos/lda/LDAmodel_2015_03_K_22.pkl
Finished num topic 22 05:48:20
Starting num topic 24 05:48:20
data/eos/lda/LDAmodel_2015_03_K_24.pkl
Finished num topic 24 05:48:21
Starting num topic 26 05:48:21
data/eos/lda/LDAmodel_2015_03_K_26.pkl
Finished num topic 26 05:48:22
Starting num topic 28 05:48:22
data/eos/lda/LDAmodel_2015_03_K_28.pkl
Finished num topic 28 05:48:23
Starting num topic 30 05:48:23
data/eos/lda/LDAmodel_2015_03_K_30.pkl
Finished num topic 30 05:48:24
Finished topic modeing window 2015_03 05:48:24
Starting topic modeing window 2015_04 05:48:24
data/eos/dic_bow/bigram_dict_2015_04.dict
data/eos/dic_bow/bigram_b

Finished num topic 24 05:49:11
Starting num topic 26 05:49:11
data/eos/lda/LDAmodel_2015_09_K_26.pkl
Finished num topic 26 05:49:13
Starting num topic 28 05:49:13
data/eos/lda/LDAmodel_2015_09_K_28.pkl
Finished num topic 28 05:49:15
Starting num topic 30 05:49:15
data/eos/lda/LDAmodel_2015_09_K_30.pkl
Finished num topic 30 05:49:16
Finished topic modeing window 2015_09 05:49:16
Starting topic modeing window 2015_10 05:49:16
data/eos/dic_bow/bigram_dict_2015_10.dict
data/eos/dic_bow/bigram_bow_corpus_2015_10.mm
Starting num topic 10 05:49:16
data/eos/lda/LDAmodel_2015_10_K_10.pkl
Finished num topic 10 05:49:35
Starting num topic 12 05:49:35
data/eos/lda/LDAmodel_2015_10_K_12.pkl
Finished num topic 12 05:49:59
Starting num topic 14 05:49:59
data/eos/lda/LDAmodel_2015_10_K_14.pkl
Finished num topic 14 05:50:34
Starting num topic 16 05:50:34
data/eos/lda/LDAmodel_2015_10_K_16.pkl
Finished num topic 16 05:51:12
Starting num topic 18 05:51:12
data/eos/lda/LDAmodel_2015_10_K_18.pkl
Finished n

Finished num topic 10 06:27:23
Starting num topic 12 06:27:23
data/eos/lda/LDAmodel_2016_04_K_12.pkl
Finished num topic 12 06:27:50
Starting num topic 14 06:27:50
data/eos/lda/LDAmodel_2016_04_K_14.pkl
Finished num topic 14 06:28:23
Starting num topic 16 06:28:23
data/eos/lda/LDAmodel_2016_04_K_16.pkl
Finished num topic 16 06:29:03
Starting num topic 18 06:29:03
data/eos/lda/LDAmodel_2016_04_K_18.pkl
Finished num topic 18 06:29:48
Starting num topic 20 06:29:48
data/eos/lda/LDAmodel_2016_04_K_20.pkl
Finished num topic 20 06:30:32
Starting num topic 22 06:30:32
data/eos/lda/LDAmodel_2016_04_K_22.pkl
Finished num topic 22 06:31:20
Starting num topic 24 06:31:20
data/eos/lda/LDAmodel_2016_04_K_24.pkl
Finished num topic 24 06:32:09
Starting num topic 26 06:32:09
data/eos/lda/LDAmodel_2016_04_K_26.pkl
Finished num topic 26 06:33:01
Starting num topic 28 06:33:01
data/eos/lda/LDAmodel_2016_04_K_28.pkl
Finished num topic 28 06:33:54
Starting num topic 30 06:33:54
data/eos/lda/LDAmodel_2016_04

Finished num topic 20 07:37:06
Starting num topic 22 07:37:06
data/eos/lda/LDAmodel_2016_10_K_22.pkl
Finished num topic 22 07:37:58
Starting num topic 24 07:37:58
data/eos/lda/LDAmodel_2016_10_K_24.pkl
Finished num topic 24 07:38:55
Starting num topic 26 07:38:55
data/eos/lda/LDAmodel_2016_10_K_26.pkl
Finished num topic 26 07:39:57
Starting num topic 28 07:39:57
data/eos/lda/LDAmodel_2016_10_K_28.pkl
Finished num topic 28 07:41:03
Starting num topic 30 07:41:03
data/eos/lda/LDAmodel_2016_10_K_30.pkl
Finished num topic 30 07:42:12
Finished topic modeing window 2016_10 07:42:12
Starting topic modeing window 2016_11 07:42:12
data/eos/dic_bow/bigram_dict_2016_11.dict
data/eos/dic_bow/bigram_bow_corpus_2016_11.mm
Starting num topic 10 07:42:12
data/eos/lda/LDAmodel_2016_11_K_10.pkl
Finished num topic 10 07:42:32
Starting num topic 12 07:42:32
data/eos/lda/LDAmodel_2016_11_K_12.pkl
Finished num topic 12 07:42:53
Starting num topic 14 07:42:53
data/eos/lda/LDAmodel_2016_11_K_14.pkl
Finished n

Finished num topic 30 08:14:16
Finished topic modeing window 2017_04 08:14:16
Starting topic modeing window 2017_05 08:14:16
data/eos/dic_bow/bigram_dict_2017_05.dict
data/eos/dic_bow/bigram_bow_corpus_2017_05.mm
Starting num topic 10 08:14:16
data/eos/lda/LDAmodel_2017_05_K_10.pkl
Finished num topic 10 08:14:41
Starting num topic 12 08:14:41
data/eos/lda/LDAmodel_2017_05_K_12.pkl
Finished num topic 12 08:15:08
Starting num topic 14 08:15:08
data/eos/lda/LDAmodel_2017_05_K_14.pkl
Finished num topic 14 08:15:40
Starting num topic 16 08:15:40
data/eos/lda/LDAmodel_2017_05_K_16.pkl
Finished num topic 16 08:16:14
Starting num topic 18 08:16:14
data/eos/lda/LDAmodel_2017_05_K_18.pkl
Finished num topic 18 08:16:52
Starting num topic 20 08:16:52
data/eos/lda/LDAmodel_2017_05_K_20.pkl
Finished num topic 20 08:17:31
Starting num topic 22 08:17:31
data/eos/lda/LDAmodel_2017_05_K_22.pkl
Finished num topic 22 08:18:14
Starting num topic 24 08:18:14
data/eos/lda/LDAmodel_2017_05_K_24.pkl
Finished n

# LDA evaluation

In [3]:
# To get the topic words from the model
def get_topics(ldamodel, num_topics, num_words):
    topics = []
    for topic_id, topic in ldamodel.show_topics(num_topics=num_topics, num_words=num_words, formatted=False):
        topic = [word for word, _ in topic]
        topics.append(topic)
    return topics

# ldamodel = joblib.load('data/eos/lda/28_LDAmodel_EOS.pkl') 
# print(get_topics(ldamodel))

In [None]:
%%time
               

def eval_lda(fname, corpus_text, topic_num):
    
    ldamodel = joblib.load(fname) 
    print("Topic modeling loaded, starting coherence calculation " + fname + datetime.now().strftime("%H:%M:%S"))
    cm = CoherenceModel(dictionary=dictionary, corpus=corpus, 
                        texts=corpus_text, topics=get_topics(ldamodel, topic_num, 10), coherence='c_v')
#     print(cm)
    coherence = cm.get_coherence()
    print("finished model " + fname + " coherence {} ".format(coherence) + datetime.now().strftime("%H:%M:%S"))
    return [topic_num, coherence]


def execute_eval(topic_model_path):
    model_list = glob.glob(topic_model_path)
    model_list.sort()
    corpus_text = MyDocuments('data/eos/ngram/bigram_transformed_docs_%s.gz' % 'all')
    coherence_list = []
    for fname in model_list:
        topic_num = int(fname[13:15])
        coherence = eval_lda(fname, corpus_text, topic_num)
        coherence_list.append(coherence)
    
    coherence_list.sort(key=lambda x: x[0])
    print(coherence_list)
        
    # Show graph
    indices = [x[0] for x in coherence_list]
    y = [abs(x[1]) for x in coherence_list]
    n = len(coherence_list)
    x = range(n)
    plt.bar(x, y, width=0.2, tick_label=indices, align='center')
    plt.xlabel('Models')
    plt.ylabel('Coherence Value')
    plt.show()
    plt.savefig('data/eos/graphs/%s_coherence.png' % topic_model_path[15:-4], dpi=1200)


In [None]:
%%time

topic_model_path = 'data/eos/lda/*_LDAmodel_EOS.pkl'
# topic_model_path = 'data/eos/lda/*_LSImodel_EOS.pkl'
# topic_model_path = 'data/eos/lda/*_Malletmodel_EOS.pkl'
execute_eval(topic_model_path)

# Evaluate topic LSA Mallet models

In [6]:
def mallet_lda_build(num_topics, dictionary, corpus, mallet_model_path):
    mallet_bin = '/home/sonic/sonic/mallet-2.0.8/bin/mallet'
    print ("start modeling LDA Mallet " + datetime.now().strftime("%H:%M:%S"))
    lda_mallet_model = gensim.models.wrappers.LdaMallet(mallet_bin, corpus=corpus, workers=7,
                                               num_topics=num_topics, id2word=dictionary)

    joblib.dump(lda_mallet_model, mallet_model_path)
    print ("finished modeling LDA Mallet, number of topics {}, ".format(num_topics) + datetime.now().strftime("%H:%M:%S"))
    return lda_mallet_model


In [7]:
def lsi_model_build(num_topics, dictionary, corpus, lsi_model_path):
    print ("start modeling lsi " + datetime.now().strftime("%H:%M:%S"))
    lsimodel = gensim.models.lsimodel.LsiModel(corpus=corpus, num_topics=num_topics, id2word=dictionary)
    joblib.dump(lsimodel, lsi_model_path)
    #     lsimodel.show_topics(num_topics=10)  # Showing only the top 5 topics
    print ("finished modeling lsi, number of topics {}, ".format(num_topics) + datetime.now().strftime("%H:%M:%S"))
    return lsimodel


In [14]:
%%time 


def generate_mallet(limit):
    """
    Function to generate num_topics - LDA  
    
    Parameters:
    ----------
    limit : topic limit

    Range is incremented by 2
    """
    window_list = glob.glob('dynamic_nmf/data/windowbin/slices/tokenized_window_20*.gz')
    window_list.sort()
    
    for window_docs in window_list:
        window_docs = window_docs[-10: -3]
        # Load to memory
        corpus = gensim.corpora.MmCorpus(bow_filepath % window_docs)
        dictionary = gensim.corpora.Dictionary.load(dictionary_filepath % window_docs)
        for num_topics in range(10, limit + 2, 2):
            print("Starting num topic {} ".format(num_topics) + datetime.now().strftime("%H:%M:%S"))
            mallet_model_path = 'data/eos/mallet/Malletmodel_%s_K_%02d.pkl' % (window_docs, num_topics)
            mallet_lda_build(num_topics, dictionary, corpus, mallet_model_path)
            print("Finished topic modeing window {} ".format(window_docs) + datetime.now().strftime("%H:%M:%S"))

        
        
def generate_lsi(limit):
    """
    Perform TF-IDF ttransformation
    """
    window_list = glob.glob('dynamic_nmf/data/windowbin/slices/tokenized_window_20*.gz')
    window_list.sort()
    
    for window_docs in window_list:
        window_docs = window_docs[-10: -3]
        # Load to memory
        corpus = gensim.corpora.MmCorpus(bow_filepath % window_docs)
        tfidf = gensim.models.TfidfModel(corpus) 
        dictionary = gensim.corpora.Dictionary.load(dictionary_filepath % window_docs)
        tfidf_corpus = tfidf[corpus]
        for num_topics in range(10, limit + 2, 2):
            print("Starting num topic {} ".format(num_topics) + datetime.now().strftime("%H:%M:%S"))
#             print(dictionary_filepath % window_docs)
#             print(bow_filepath % window_docs)
            lsi_model_path = 'data/eos/lsi/LSImodel_%s_K_%02d.pkl' % (window_docs,num_topics)
            lsi_model_build(num_topics, dictionary, tfidf_corpus, lsi_model_path)
            print("Finished topic modeing window {} ".format(window_docs) + datetime.now().strftime("%H:%M:%S"))
        
    

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 6.44 µs


In [13]:
%%time

dictionary_filepath = 'data/eos/dic_bow/bigram_dict_%s.dict' 
bow_filepath = 'data/eos/dic_bow/bigram_bow_corpus_%s.mm' 

generate_lsi(limit=30)

Starting num topic 10 23:50:42
data/eos/dic_bow/bigram_dict_2012_01.dict
data/eos/dic_bow/bigram_bow_corpus_2012_01.mm
start modeling lsi 23:50:42
finished modeling lsi, number of topics 10, 23:51:04
Finished topic modeing window 2012_01 23:51:04
Starting num topic 12 23:51:04
data/eos/dic_bow/bigram_dict_2012_01.dict
data/eos/dic_bow/bigram_bow_corpus_2012_01.mm
start modeling lsi 23:51:04
finished modeling lsi, number of topics 12, 23:51:27
Finished topic modeing window 2012_01 23:51:27
Starting num topic 14 23:51:27
data/eos/dic_bow/bigram_dict_2012_01.dict
data/eos/dic_bow/bigram_bow_corpus_2012_01.mm
start modeling lsi 23:51:27
finished modeling lsi, number of topics 14, 23:51:49
Finished topic modeing window 2012_01 23:51:49
Starting num topic 16 23:51:49
data/eos/dic_bow/bigram_dict_2012_01.dict
data/eos/dic_bow/bigram_bow_corpus_2012_01.mm
start modeling lsi 23:51:49
finished modeling lsi, number of topics 16, 23:52:12
Finished topic modeing window 2012_01 23:52:12
Starting num

data/eos/dic_bow/bigram_dict_2012_04.dict
data/eos/dic_bow/bigram_bow_corpus_2012_04.mm
start modeling lsi 00:04:07
finished modeling lsi, number of topics 10, 00:04:48
Finished topic modeing window 2012_04 00:04:48
Starting num topic 12 00:04:48
data/eos/dic_bow/bigram_dict_2012_04.dict
data/eos/dic_bow/bigram_bow_corpus_2012_04.mm
start modeling lsi 00:04:48
finished modeling lsi, number of topics 12, 00:05:30
Finished topic modeing window 2012_04 00:05:30
Starting num topic 14 00:05:30
data/eos/dic_bow/bigram_dict_2012_04.dict
data/eos/dic_bow/bigram_bow_corpus_2012_04.mm
start modeling lsi 00:05:30
finished modeling lsi, number of topics 14, 00:06:13
Finished topic modeing window 2012_04 00:06:13
Starting num topic 16 00:06:13
data/eos/dic_bow/bigram_dict_2012_04.dict
data/eos/dic_bow/bigram_bow_corpus_2012_04.mm
start modeling lsi 00:06:13
finished modeling lsi, number of topics 16, 00:06:56
Finished topic modeing window 2012_04 00:06:56
Starting num topic 18 00:06:56
data/eos/dic

finished modeling lsi, number of topics 10, 00:24:40
Finished topic modeing window 2012_07 00:24:40
Starting num topic 12 00:24:40
data/eos/dic_bow/bigram_dict_2012_07.dict
data/eos/dic_bow/bigram_bow_corpus_2012_07.mm
start modeling lsi 00:24:40
finished modeling lsi, number of topics 12, 00:24:59
Finished topic modeing window 2012_07 00:24:59
Starting num topic 14 00:24:59
data/eos/dic_bow/bigram_dict_2012_07.dict
data/eos/dic_bow/bigram_bow_corpus_2012_07.mm
start modeling lsi 00:24:59
finished modeling lsi, number of topics 14, 00:25:18
Finished topic modeing window 2012_07 00:25:18
Starting num topic 16 00:25:18
data/eos/dic_bow/bigram_dict_2012_07.dict
data/eos/dic_bow/bigram_bow_corpus_2012_07.mm
start modeling lsi 00:25:18
finished modeling lsi, number of topics 16, 00:25:38
Finished topic modeing window 2012_07 00:25:38
Starting num topic 18 00:25:38
data/eos/dic_bow/bigram_dict_2012_07.dict
data/eos/dic_bow/bigram_bow_corpus_2012_07.mm
start modeling lsi 00:25:38
finished mod

data/eos/dic_bow/bigram_dict_2012_10.dict
data/eos/dic_bow/bigram_bow_corpus_2012_10.mm
start modeling lsi 00:30:33
finished modeling lsi, number of topics 12, 00:30:35
Finished topic modeing window 2012_10 00:30:35
Starting num topic 14 00:30:35
data/eos/dic_bow/bigram_dict_2012_10.dict
data/eos/dic_bow/bigram_bow_corpus_2012_10.mm
start modeling lsi 00:30:35
finished modeling lsi, number of topics 14, 00:30:37
Finished topic modeing window 2012_10 00:30:37
Starting num topic 16 00:30:37
data/eos/dic_bow/bigram_dict_2012_10.dict
data/eos/dic_bow/bigram_bow_corpus_2012_10.mm
start modeling lsi 00:30:37
finished modeling lsi, number of topics 16, 00:30:39
Finished topic modeing window 2012_10 00:30:39
Starting num topic 18 00:30:39
data/eos/dic_bow/bigram_dict_2012_10.dict
data/eos/dic_bow/bigram_bow_corpus_2012_10.mm
start modeling lsi 00:30:39
finished modeling lsi, number of topics 18, 00:30:41
Finished topic modeing window 2012_10 00:30:41
Starting num topic 20 00:30:41
data/eos/dic

start modeling lsi 00:32:59
finished modeling lsi, number of topics 12, 00:33:12
Finished topic modeing window 2013_01 00:33:12
Starting num topic 14 00:33:12
data/eos/dic_bow/bigram_dict_2013_01.dict
data/eos/dic_bow/bigram_bow_corpus_2013_01.mm
start modeling lsi 00:33:12
finished modeling lsi, number of topics 14, 00:33:25
Finished topic modeing window 2013_01 00:33:25
Starting num topic 16 00:33:25
data/eos/dic_bow/bigram_dict_2013_01.dict
data/eos/dic_bow/bigram_bow_corpus_2013_01.mm
start modeling lsi 00:33:25
finished modeling lsi, number of topics 16, 00:33:37
Finished topic modeing window 2013_01 00:33:37
Starting num topic 18 00:33:37
data/eos/dic_bow/bigram_dict_2013_01.dict
data/eos/dic_bow/bigram_bow_corpus_2013_01.mm
start modeling lsi 00:33:37
finished modeling lsi, number of topics 18, 00:33:51
Finished topic modeing window 2013_01 00:33:51
Starting num topic 20 00:33:51
data/eos/dic_bow/bigram_dict_2013_01.dict
data/eos/dic_bow/bigram_bow_corpus_2013_01.mm
start modeli

data/eos/dic_bow/bigram_dict_2013_04.dict
data/eos/dic_bow/bigram_bow_corpus_2013_04.mm
start modeling lsi 00:41:19
finished modeling lsi, number of topics 14, 00:41:33
Finished topic modeing window 2013_04 00:41:33
Starting num topic 16 00:41:33
data/eos/dic_bow/bigram_dict_2013_04.dict
data/eos/dic_bow/bigram_bow_corpus_2013_04.mm
start modeling lsi 00:41:33
finished modeling lsi, number of topics 16, 00:41:48
Finished topic modeing window 2013_04 00:41:48
Starting num topic 18 00:41:48
data/eos/dic_bow/bigram_dict_2013_04.dict
data/eos/dic_bow/bigram_bow_corpus_2013_04.mm
start modeling lsi 00:41:48
finished modeling lsi, number of topics 18, 00:42:02
Finished topic modeing window 2013_04 00:42:02
Starting num topic 20 00:42:02
data/eos/dic_bow/bigram_dict_2013_04.dict
data/eos/dic_bow/bigram_bow_corpus_2013_04.mm
start modeling lsi 00:42:02
finished modeling lsi, number of topics 20, 00:42:17
Finished topic modeing window 2013_04 00:42:17
Starting num topic 22 00:42:17
data/eos/dic

data/eos/dic_bow/bigram_dict_2013_07.dict
data/eos/dic_bow/bigram_bow_corpus_2013_07.mm
start modeling lsi 01:00:18
finished modeling lsi, number of topics 16, 01:00:20
Finished topic modeing window 2013_07 01:00:20
Starting num topic 18 01:00:20
data/eos/dic_bow/bigram_dict_2013_07.dict
data/eos/dic_bow/bigram_bow_corpus_2013_07.mm
start modeling lsi 01:00:20
finished modeling lsi, number of topics 18, 01:00:22
Finished topic modeing window 2013_07 01:00:22
Starting num topic 20 01:00:22
data/eos/dic_bow/bigram_dict_2013_07.dict
data/eos/dic_bow/bigram_bow_corpus_2013_07.mm
start modeling lsi 01:00:22
finished modeling lsi, number of topics 20, 01:00:24
Finished topic modeing window 2013_07 01:00:24
Starting num topic 22 01:00:24
data/eos/dic_bow/bigram_dict_2013_07.dict
data/eos/dic_bow/bigram_bow_corpus_2013_07.mm
start modeling lsi 01:00:24
finished modeling lsi, number of topics 22, 01:00:26
Finished topic modeing window 2013_07 01:00:26
Starting num topic 24 01:00:26
data/eos/dic

finished modeling lsi, number of topics 18, 01:04:34
Finished topic modeing window 2013_10 01:04:34
Starting num topic 20 01:04:34
data/eos/dic_bow/bigram_dict_2013_10.dict
data/eos/dic_bow/bigram_bow_corpus_2013_10.mm
start modeling lsi 01:04:34
finished modeling lsi, number of topics 20, 01:04:37
Finished topic modeing window 2013_10 01:04:37
Starting num topic 22 01:04:37
data/eos/dic_bow/bigram_dict_2013_10.dict
data/eos/dic_bow/bigram_bow_corpus_2013_10.mm
start modeling lsi 01:04:37
finished modeling lsi, number of topics 22, 01:04:39
Finished topic modeing window 2013_10 01:04:39
Starting num topic 24 01:04:39
data/eos/dic_bow/bigram_dict_2013_10.dict
data/eos/dic_bow/bigram_bow_corpus_2013_10.mm
start modeling lsi 01:04:39
finished modeling lsi, number of topics 24, 01:04:41
Finished topic modeing window 2013_10 01:04:41
Starting num topic 26 01:04:41
data/eos/dic_bow/bigram_dict_2013_10.dict
data/eos/dic_bow/bigram_bow_corpus_2013_10.mm
start modeling lsi 01:04:41
finished mod

Starting num topic 20 01:10:51
data/eos/dic_bow/bigram_dict_2014_01.dict
data/eos/dic_bow/bigram_bow_corpus_2014_01.mm
start modeling lsi 01:10:51
finished modeling lsi, number of topics 20, 01:11:10
Finished topic modeing window 2014_01 01:11:10
Starting num topic 22 01:11:10
data/eos/dic_bow/bigram_dict_2014_01.dict
data/eos/dic_bow/bigram_bow_corpus_2014_01.mm
start modeling lsi 01:11:10
finished modeling lsi, number of topics 22, 01:11:32
Finished topic modeing window 2014_01 01:11:32
Starting num topic 24 01:11:32
data/eos/dic_bow/bigram_dict_2014_01.dict
data/eos/dic_bow/bigram_bow_corpus_2014_01.mm
start modeling lsi 01:11:32
finished modeling lsi, number of topics 24, 01:11:54
Finished topic modeing window 2014_01 01:11:54
Starting num topic 26 01:11:54
data/eos/dic_bow/bigram_dict_2014_01.dict
data/eos/dic_bow/bigram_bow_corpus_2014_01.mm
start modeling lsi 01:11:54
finished modeling lsi, number of topics 26, 01:12:13
Finished topic modeing window 2014_01 01:12:13
Starting num

finished modeling lsi, number of topics 20, 01:18:14
Finished topic modeing window 2014_04 01:18:14
Starting num topic 22 01:18:14
data/eos/dic_bow/bigram_dict_2014_04.dict
data/eos/dic_bow/bigram_bow_corpus_2014_04.mm
start modeling lsi 01:18:14
finished modeling lsi, number of topics 22, 01:18:31
Finished topic modeing window 2014_04 01:18:31
Starting num topic 24 01:18:31
data/eos/dic_bow/bigram_dict_2014_04.dict
data/eos/dic_bow/bigram_bow_corpus_2014_04.mm
start modeling lsi 01:18:31
finished modeling lsi, number of topics 24, 01:18:49
Finished topic modeing window 2014_04 01:18:49
Starting num topic 26 01:18:49
data/eos/dic_bow/bigram_dict_2014_04.dict
data/eos/dic_bow/bigram_bow_corpus_2014_04.mm
start modeling lsi 01:18:49
finished modeling lsi, number of topics 26, 01:19:06
Finished topic modeing window 2014_04 01:19:06
Starting num topic 28 01:19:06
data/eos/dic_bow/bigram_dict_2014_04.dict
data/eos/dic_bow/bigram_bow_corpus_2014_04.mm
start modeling lsi 01:19:06
finished mod

data/eos/dic_bow/bigram_dict_2014_07.dict
data/eos/dic_bow/bigram_bow_corpus_2014_07.mm
start modeling lsi 01:24:48
finished modeling lsi, number of topics 22, 01:24:51
Finished topic modeing window 2014_07 01:24:51
Starting num topic 24 01:24:51
data/eos/dic_bow/bigram_dict_2014_07.dict
data/eos/dic_bow/bigram_bow_corpus_2014_07.mm
start modeling lsi 01:24:51
finished modeling lsi, number of topics 24, 01:24:54
Finished topic modeing window 2014_07 01:24:54
Starting num topic 26 01:24:54
data/eos/dic_bow/bigram_dict_2014_07.dict
data/eos/dic_bow/bigram_bow_corpus_2014_07.mm
start modeling lsi 01:24:54
finished modeling lsi, number of topics 26, 01:24:57
Finished topic modeing window 2014_07 01:24:57
Starting num topic 28 01:24:57
data/eos/dic_bow/bigram_dict_2014_07.dict
data/eos/dic_bow/bigram_bow_corpus_2014_07.mm
start modeling lsi 01:24:57
finished modeling lsi, number of topics 28, 01:25:00
Finished topic modeing window 2014_07 01:25:00
Starting num topic 30 01:25:00
data/eos/dic

data/eos/dic_bow/bigram_bow_corpus_2014_10.mm
start modeling lsi 01:34:02
finished modeling lsi, number of topics 24, 01:34:28
Finished topic modeing window 2014_10 01:34:28
Starting num topic 26 01:34:28
data/eos/dic_bow/bigram_dict_2014_10.dict
data/eos/dic_bow/bigram_bow_corpus_2014_10.mm
start modeling lsi 01:34:28
finished modeling lsi, number of topics 26, 01:34:54
Finished topic modeing window 2014_10 01:34:54
Starting num topic 28 01:34:54
data/eos/dic_bow/bigram_dict_2014_10.dict
data/eos/dic_bow/bigram_bow_corpus_2014_10.mm
start modeling lsi 01:34:54
finished modeling lsi, number of topics 28, 01:35:21
Finished topic modeing window 2014_10 01:35:21
Starting num topic 30 01:35:21
data/eos/dic_bow/bigram_dict_2014_10.dict
data/eos/dic_bow/bigram_bow_corpus_2014_10.mm
start modeling lsi 01:35:21
finished modeling lsi, number of topics 30, 01:35:45
Finished topic modeing window 2014_10 01:35:45
Starting num topic 10 01:35:51
data/eos/dic_bow/bigram_dict_2014_11.dict
data/eos/dic

finished modeling lsi, number of topics 30, 01:39:40
Finished topic modeing window 2015_02 01:39:40
Starting num topic 10 01:39:40
data/eos/dic_bow/bigram_dict_2015_03.dict
data/eos/dic_bow/bigram_bow_corpus_2015_03.mm
start modeling lsi 01:39:40
finished modeling lsi, number of topics 10, 01:39:40
Finished topic modeing window 2015_03 01:39:40
Starting num topic 12 01:39:40
data/eos/dic_bow/bigram_dict_2015_03.dict
data/eos/dic_bow/bigram_bow_corpus_2015_03.mm
start modeling lsi 01:39:40
finished modeling lsi, number of topics 12, 01:39:40
Finished topic modeing window 2015_03 01:39:40
Starting num topic 14 01:39:40
data/eos/dic_bow/bigram_dict_2015_03.dict
data/eos/dic_bow/bigram_bow_corpus_2015_03.mm
start modeling lsi 01:39:40
finished modeling lsi, number of topics 14, 01:39:40
Finished topic modeing window 2015_03 01:39:40
Starting num topic 16 01:39:40
data/eos/dic_bow/bigram_dict_2015_03.dict
data/eos/dic_bow/bigram_bow_corpus_2015_03.mm
start modeling lsi 01:39:40
finished mod

finished modeling lsi, number of topics 16, 01:39:43
Finished topic modeing window 2015_06 01:39:43
Starting num topic 18 01:39:43
data/eos/dic_bow/bigram_dict_2015_06.dict
data/eos/dic_bow/bigram_bow_corpus_2015_06.mm
start modeling lsi 01:39:43
finished modeling lsi, number of topics 18, 01:39:43
Finished topic modeing window 2015_06 01:39:43
Starting num topic 20 01:39:43
data/eos/dic_bow/bigram_dict_2015_06.dict
data/eos/dic_bow/bigram_bow_corpus_2015_06.mm
start modeling lsi 01:39:43
finished modeling lsi, number of topics 20, 01:39:43
Finished topic modeing window 2015_06 01:39:43
Starting num topic 22 01:39:43
data/eos/dic_bow/bigram_dict_2015_06.dict
data/eos/dic_bow/bigram_bow_corpus_2015_06.mm
start modeling lsi 01:39:43
finished modeling lsi, number of topics 22, 01:39:43
Finished topic modeing window 2015_06 01:39:43
Starting num topic 24 01:39:43
data/eos/dic_bow/bigram_dict_2015_06.dict
data/eos/dic_bow/bigram_bow_corpus_2015_06.mm
start modeling lsi 01:39:43
finished mod

finished modeling lsi, number of topics 18, 01:39:45
Finished topic modeing window 2015_09 01:39:45
Starting num topic 20 01:39:45
data/eos/dic_bow/bigram_dict_2015_09.dict
data/eos/dic_bow/bigram_bow_corpus_2015_09.mm
start modeling lsi 01:39:45
finished modeling lsi, number of topics 20, 01:39:45
Finished topic modeing window 2015_09 01:39:45
Starting num topic 22 01:39:45
data/eos/dic_bow/bigram_dict_2015_09.dict
data/eos/dic_bow/bigram_bow_corpus_2015_09.mm
start modeling lsi 01:39:45
finished modeling lsi, number of topics 22, 01:39:45
Finished topic modeing window 2015_09 01:39:45
Starting num topic 24 01:39:45
data/eos/dic_bow/bigram_dict_2015_09.dict
data/eos/dic_bow/bigram_bow_corpus_2015_09.mm
start modeling lsi 01:39:45
finished modeling lsi, number of topics 24, 01:39:45
Finished topic modeing window 2015_09 01:39:45
Starting num topic 26 01:39:45
data/eos/dic_bow/bigram_dict_2015_09.dict
data/eos/dic_bow/bigram_bow_corpus_2015_09.mm
start modeling lsi 01:39:45
finished mod

finished modeling lsi, number of topics 20, 01:43:14
Finished topic modeing window 2015_12 01:43:14
Starting num topic 22 01:43:14
data/eos/dic_bow/bigram_dict_2015_12.dict
data/eos/dic_bow/bigram_bow_corpus_2015_12.mm
start modeling lsi 01:43:14
finished modeling lsi, number of topics 22, 01:43:19
Finished topic modeing window 2015_12 01:43:19
Starting num topic 24 01:43:19
data/eos/dic_bow/bigram_dict_2015_12.dict
data/eos/dic_bow/bigram_bow_corpus_2015_12.mm
start modeling lsi 01:43:19
finished modeling lsi, number of topics 24, 01:43:24
Finished topic modeing window 2015_12 01:43:24
Starting num topic 26 01:43:24
data/eos/dic_bow/bigram_dict_2015_12.dict
data/eos/dic_bow/bigram_bow_corpus_2015_12.mm
start modeling lsi 01:43:24
finished modeling lsi, number of topics 26, 01:43:29
Finished topic modeing window 2015_12 01:43:29
Starting num topic 28 01:43:29
data/eos/dic_bow/bigram_dict_2015_12.dict
data/eos/dic_bow/bigram_bow_corpus_2015_12.mm
start modeling lsi 01:43:29
finished mod

data/eos/dic_bow/bigram_bow_corpus_2016_03.mm
start modeling lsi 01:46:47
finished modeling lsi, number of topics 22, 01:46:57
Finished topic modeing window 2016_03 01:46:57
Starting num topic 24 01:46:57
data/eos/dic_bow/bigram_dict_2016_03.dict
data/eos/dic_bow/bigram_bow_corpus_2016_03.mm
start modeling lsi 01:46:57
finished modeling lsi, number of topics 24, 01:47:07
Finished topic modeing window 2016_03 01:47:07
Starting num topic 26 01:47:07
data/eos/dic_bow/bigram_dict_2016_03.dict
data/eos/dic_bow/bigram_bow_corpus_2016_03.mm
start modeling lsi 01:47:07
finished modeling lsi, number of topics 26, 01:47:17
Finished topic modeing window 2016_03 01:47:17
Starting num topic 28 01:47:17
data/eos/dic_bow/bigram_dict_2016_03.dict
data/eos/dic_bow/bigram_bow_corpus_2016_03.mm
start modeling lsi 01:47:17
finished modeling lsi, number of topics 28, 01:47:28
Finished topic modeing window 2016_03 01:47:28
Starting num topic 30 01:47:28
data/eos/dic_bow/bigram_dict_2016_03.dict
data/eos/dic

finished modeling lsi, number of topics 24, 01:52:52
Finished topic modeing window 2016_06 01:52:52
Starting num topic 26 01:52:52
data/eos/dic_bow/bigram_dict_2016_06.dict
data/eos/dic_bow/bigram_bow_corpus_2016_06.mm
start modeling lsi 01:52:52
finished modeling lsi, number of topics 26, 01:53:05
Finished topic modeing window 2016_06 01:53:05
Starting num topic 28 01:53:05
data/eos/dic_bow/bigram_dict_2016_06.dict
data/eos/dic_bow/bigram_bow_corpus_2016_06.mm
start modeling lsi 01:53:05
finished modeling lsi, number of topics 28, 01:53:18
Finished topic modeing window 2016_06 01:53:18
Starting num topic 30 01:53:18
data/eos/dic_bow/bigram_dict_2016_06.dict
data/eos/dic_bow/bigram_bow_corpus_2016_06.mm
start modeling lsi 01:53:18
finished modeling lsi, number of topics 30, 01:53:30
Finished topic modeing window 2016_06 01:53:30
Starting num topic 10 01:53:34
data/eos/dic_bow/bigram_dict_2016_07.dict
data/eos/dic_bow/bigram_bow_corpus_2016_07.mm
start modeling lsi 01:53:34
finished mod

Finished topic modeing window 2016_09 01:59:53
Starting num topic 26 01:59:53
data/eos/dic_bow/bigram_dict_2016_09.dict
data/eos/dic_bow/bigram_bow_corpus_2016_09.mm
start modeling lsi 01:59:53
finished modeling lsi, number of topics 26, 02:00:07
Finished topic modeing window 2016_09 02:00:07
Starting num topic 28 02:00:07
data/eos/dic_bow/bigram_dict_2016_09.dict
data/eos/dic_bow/bigram_bow_corpus_2016_09.mm
start modeling lsi 02:00:07
finished modeling lsi, number of topics 28, 02:00:21
Finished topic modeing window 2016_09 02:00:21
Starting num topic 30 02:00:21
data/eos/dic_bow/bigram_dict_2016_09.dict
data/eos/dic_bow/bigram_bow_corpus_2016_09.mm
start modeling lsi 02:00:21
finished modeling lsi, number of topics 30, 02:00:34
Finished topic modeing window 2016_09 02:00:34
Starting num topic 10 02:00:39
data/eos/dic_bow/bigram_dict_2016_10.dict
data/eos/dic_bow/bigram_bow_corpus_2016_10.mm
start modeling lsi 02:00:39
finished modeling lsi, number of topics 10, 02:00:48
Finished top

start modeling lsi 02:04:06
finished modeling lsi, number of topics 26, 02:04:13
Finished topic modeing window 2016_12 02:04:13
Starting num topic 28 02:04:13
data/eos/dic_bow/bigram_dict_2016_12.dict
data/eos/dic_bow/bigram_bow_corpus_2016_12.mm
start modeling lsi 02:04:13
finished modeling lsi, number of topics 28, 02:04:21
Finished topic modeing window 2016_12 02:04:21
Starting num topic 30 02:04:21
data/eos/dic_bow/bigram_dict_2016_12.dict
data/eos/dic_bow/bigram_bow_corpus_2016_12.mm
start modeling lsi 02:04:21
finished modeling lsi, number of topics 30, 02:04:28
Finished topic modeing window 2016_12 02:04:28
Starting num topic 10 02:04:30
data/eos/dic_bow/bigram_dict_2017_01.dict
data/eos/dic_bow/bigram_bow_corpus_2017_01.mm
start modeling lsi 02:04:30
finished modeling lsi, number of topics 10, 02:04:36
Finished topic modeing window 2017_01 02:04:36
Starting num topic 12 02:04:36
data/eos/dic_bow/bigram_dict_2017_01.dict
data/eos/dic_bow/bigram_bow_corpus_2017_01.mm
start modeli

data/eos/dic_bow/bigram_dict_2017_03.dict
data/eos/dic_bow/bigram_bow_corpus_2017_03.mm
start modeling lsi 02:06:41
finished modeling lsi, number of topics 28, 02:06:45
Finished topic modeing window 2017_03 02:06:45
Starting num topic 30 02:06:45
data/eos/dic_bow/bigram_dict_2017_03.dict
data/eos/dic_bow/bigram_bow_corpus_2017_03.mm
start modeling lsi 02:06:45
finished modeling lsi, number of topics 30, 02:06:48
Finished topic modeing window 2017_03 02:06:48
Starting num topic 10 02:06:51
data/eos/dic_bow/bigram_dict_2017_04.dict
data/eos/dic_bow/bigram_bow_corpus_2017_04.mm
start modeling lsi 02:06:51
finished modeling lsi, number of topics 10, 02:06:57
Finished topic modeing window 2017_04 02:06:57
Starting num topic 12 02:06:57
data/eos/dic_bow/bigram_dict_2017_04.dict
data/eos/dic_bow/bigram_bow_corpus_2017_04.mm
start modeling lsi 02:06:57
finished modeling lsi, number of topics 12, 02:07:03
Finished topic modeing window 2017_04 02:07:03
Starting num topic 14 02:07:03
data/eos/dic

finished modeling lsi, number of topics 28, 02:09:38
Finished topic modeing window 2017_06 02:09:38
Starting num topic 30 02:09:38
data/eos/dic_bow/bigram_dict_2017_06.dict
data/eos/dic_bow/bigram_bow_corpus_2017_06.mm
start modeling lsi 02:09:38
finished modeling lsi, number of topics 30, 02:09:43
Finished topic modeing window 2017_06 02:09:43
Starting num topic 10 02:09:43
data/eos/dic_bow/bigram_dict_2017_07.dict
data/eos/dic_bow/bigram_bow_corpus_2017_07.mm
start modeling lsi 02:09:43
finished modeling lsi, number of topics 10, 02:09:43
Finished topic modeing window 2017_07 02:09:43
Starting num topic 12 02:09:43
data/eos/dic_bow/bigram_dict_2017_07.dict
data/eos/dic_bow/bigram_bow_corpus_2017_07.mm
start modeling lsi 02:09:43
finished modeling lsi, number of topics 12, 02:09:43
Finished topic modeing window 2017_07 02:09:43
Starting num topic 14 02:09:43
data/eos/dic_bow/bigram_dict_2017_07.dict
data/eos/dic_bow/bigram_bow_corpus_2017_07.mm
start modeling lsi 02:09:43
finished mod

In [15]:
%%time

limit = 30
generate_mallet(limit=limit)

Starting num topic 10 20:47:15
start modeling LDA Mallet 20:47:15
finished modeling LDA Mallet, number of topics 10, 20:51:34
Finished topic modeing window 2012_01 20:51:34
Starting num topic 12 20:51:34
start modeling LDA Mallet 20:51:34
finished modeling LDA Mallet, number of topics 12, 20:55:50
Finished topic modeing window 2012_01 20:55:50
Starting num topic 14 20:55:50
start modeling LDA Mallet 20:55:50
finished modeling LDA Mallet, number of topics 14, 21:00:22
Finished topic modeing window 2012_01 21:00:22
Starting num topic 16 21:00:22
start modeling LDA Mallet 21:00:22
finished modeling LDA Mallet, number of topics 16, 21:05:15
Finished topic modeing window 2012_01 21:05:15
Starting num topic 18 21:05:15
start modeling LDA Mallet 21:05:15
finished modeling LDA Mallet, number of topics 18, 21:10:02
Finished topic modeing window 2012_01 21:10:02
Starting num topic 20 21:10:02
start modeling LDA Mallet 21:10:02
finished modeling LDA Mallet, number of topics 20, 21:15:02
Finished 

finished modeling LDA Mallet, number of topics 16, 01:44:49
Finished topic modeing window 2012_05 01:44:49
Starting num topic 18 01:44:49
start modeling LDA Mallet 01:44:49
finished modeling LDA Mallet, number of topics 18, 01:51:01
Finished topic modeing window 2012_05 01:51:01
Starting num topic 20 01:51:01
start modeling LDA Mallet 01:51:01
finished modeling LDA Mallet, number of topics 20, 01:57:07
Finished topic modeing window 2012_05 01:57:07
Starting num topic 22 01:57:07
start modeling LDA Mallet 01:57:07
finished modeling LDA Mallet, number of topics 22, 02:03:23
Finished topic modeing window 2012_05 02:03:23
Starting num topic 24 02:03:23
start modeling LDA Mallet 02:03:23
finished modeling LDA Mallet, number of topics 24, 02:10:25
Finished topic modeing window 2012_05 02:10:25
Starting num topic 26 02:10:25
start modeling LDA Mallet 02:10:25
finished modeling LDA Mallet, number of topics 26, 02:17:07
Finished topic modeing window 2012_05 02:17:07
Starting num topic 28 02:17:

finished modeling LDA Mallet, number of topics 24, 04:56:23
Finished topic modeing window 2012_09 04:56:23
Starting num topic 26 04:56:23
start modeling LDA Mallet 04:56:23
finished modeling LDA Mallet, number of topics 26, 04:57:05
Finished topic modeing window 2012_09 04:57:05
Starting num topic 28 04:57:05
start modeling LDA Mallet 04:57:05
finished modeling LDA Mallet, number of topics 28, 04:57:47
Finished topic modeing window 2012_09 04:57:47
Starting num topic 30 04:57:47
start modeling LDA Mallet 04:57:47
finished modeling LDA Mallet, number of topics 30, 04:58:30
Finished topic modeing window 2012_09 04:58:30
Starting num topic 10 04:58:30
start modeling LDA Mallet 04:58:30
finished modeling LDA Mallet, number of topics 10, 04:59:09
Finished topic modeing window 2012_10 04:59:09
Starting num topic 12 04:59:09
start modeling LDA Mallet 04:59:09
finished modeling LDA Mallet, number of topics 12, 04:59:49
Finished topic modeing window 2012_10 04:59:49
Starting num topic 14 04:59:

finished modeling LDA Mallet, number of topics 10, 06:08:42
Finished topic modeing window 2013_02 06:08:42
Starting num topic 12 06:08:42
start modeling LDA Mallet 06:08:42
finished modeling LDA Mallet, number of topics 12, 06:11:20
Finished topic modeing window 2013_02 06:11:20
Starting num topic 14 06:11:20
start modeling LDA Mallet 06:11:20
finished modeling LDA Mallet, number of topics 14, 06:14:07
Finished topic modeing window 2013_02 06:14:07
Starting num topic 16 06:14:07
start modeling LDA Mallet 06:14:07
finished modeling LDA Mallet, number of topics 16, 06:16:51
Finished topic modeing window 2013_02 06:16:51
Starting num topic 18 06:16:51
start modeling LDA Mallet 06:16:51
finished modeling LDA Mallet, number of topics 18, 06:19:36
Finished topic modeing window 2013_02 06:19:36
Starting num topic 20 06:19:36
start modeling LDA Mallet 06:19:36
finished modeling LDA Mallet, number of topics 20, 06:22:22
Finished topic modeing window 2013_02 06:22:22
Starting num topic 22 06:22:

finished modeling LDA Mallet, number of topics 18, 09:59:59
Finished topic modeing window 2013_06 09:59:59
Starting num topic 20 09:59:59
start modeling LDA Mallet 09:59:59
finished modeling LDA Mallet, number of topics 20, 10:11:21
Finished topic modeing window 2013_06 10:11:21
Starting num topic 22 10:11:21
start modeling LDA Mallet 10:11:21
finished modeling LDA Mallet, number of topics 22, 10:23:06
Finished topic modeing window 2013_06 10:23:06
Starting num topic 24 10:23:06
start modeling LDA Mallet 10:23:06
finished modeling LDA Mallet, number of topics 24, 10:34:12
Finished topic modeing window 2013_06 10:34:12
Starting num topic 26 10:34:12
start modeling LDA Mallet 10:34:12
finished modeling LDA Mallet, number of topics 26, 10:45:28
Finished topic modeing window 2013_06 10:45:28
Starting num topic 28 10:45:28
start modeling LDA Mallet 10:45:28
finished modeling LDA Mallet, number of topics 28, 10:57:25
Finished topic modeing window 2013_06 10:57:25
Starting num topic 30 10:57:

finished modeling LDA Mallet, number of topics 26, 12:24:47
Finished topic modeing window 2013_10 12:24:47
Starting num topic 28 12:24:47
start modeling LDA Mallet 12:24:47
finished modeling LDA Mallet, number of topics 28, 12:25:28
Finished topic modeing window 2013_10 12:25:28
Starting num topic 30 12:25:28
start modeling LDA Mallet 12:25:28
finished modeling LDA Mallet, number of topics 30, 12:26:09
Finished topic modeing window 2013_10 12:26:09
Starting num topic 10 12:26:09
start modeling LDA Mallet 12:26:09
finished modeling LDA Mallet, number of topics 10, 12:28:24
Finished topic modeing window 2013_11 12:28:24
Starting num topic 12 12:28:24
start modeling LDA Mallet 12:28:24
finished modeling LDA Mallet, number of topics 12, 12:30:40
Finished topic modeing window 2013_11 12:30:40
Starting num topic 14 12:30:40
start modeling LDA Mallet 12:30:40
finished modeling LDA Mallet, number of topics 14, 12:33:00
Finished topic modeing window 2013_11 12:33:00
Starting num topic 16 12:33:

finished modeling LDA Mallet, number of topics 12, 15:03:55
Finished topic modeing window 2014_03 15:03:55
Starting num topic 14 15:03:55
start modeling LDA Mallet 15:03:55
finished modeling LDA Mallet, number of topics 14, 15:06:32
Finished topic modeing window 2014_03 15:06:32
Starting num topic 16 15:06:32
start modeling LDA Mallet 15:06:32
finished modeling LDA Mallet, number of topics 16, 15:09:12
Finished topic modeing window 2014_03 15:09:12
Starting num topic 18 15:09:12
start modeling LDA Mallet 15:09:12
finished modeling LDA Mallet, number of topics 18, 15:11:47
Finished topic modeing window 2014_03 15:11:47
Starting num topic 20 15:11:47
start modeling LDA Mallet 15:11:47
finished modeling LDA Mallet, number of topics 20, 15:14:35
Finished topic modeing window 2014_03 15:14:35
Starting num topic 22 15:14:35
start modeling LDA Mallet 15:14:35
finished modeling LDA Mallet, number of topics 22, 15:17:12
Finished topic modeing window 2014_03 15:17:12
Starting num topic 24 15:17:

finished modeling LDA Mallet, number of topics 20, 17:42:27
Finished topic modeing window 2014_07 17:42:27
Starting num topic 22 17:42:27
start modeling LDA Mallet 17:42:27
finished modeling LDA Mallet, number of topics 22, 17:43:21
Finished topic modeing window 2014_07 17:43:21
Starting num topic 24 17:43:21
start modeling LDA Mallet 17:43:21
finished modeling LDA Mallet, number of topics 24, 17:44:17
Finished topic modeing window 2014_07 17:44:17
Starting num topic 26 17:44:17
start modeling LDA Mallet 17:44:17
finished modeling LDA Mallet, number of topics 26, 17:45:12
Finished topic modeing window 2014_07 17:45:12
Starting num topic 28 17:45:12
start modeling LDA Mallet 17:45:12
finished modeling LDA Mallet, number of topics 28, 17:46:08
Finished topic modeing window 2014_07 17:46:08
Starting num topic 30 17:46:08
start modeling LDA Mallet 17:46:08
finished modeling LDA Mallet, number of topics 30, 17:47:04
Finished topic modeing window 2014_07 17:47:04
Starting num topic 10 17:47:

finished modeling LDA Mallet, number of topics 28, 20:52:35
Finished topic modeing window 2014_11 20:52:35
Starting num topic 30 20:52:35
start modeling LDA Mallet 20:52:35
finished modeling LDA Mallet, number of topics 30, 20:56:22
Finished topic modeing window 2014_11 20:56:22
Starting num topic 10 20:56:22
start modeling LDA Mallet 20:56:22
finished modeling LDA Mallet, number of topics 10, 20:57:56
Finished topic modeing window 2014_12 20:57:56
Starting num topic 12 20:57:56
start modeling LDA Mallet 20:57:56
finished modeling LDA Mallet, number of topics 12, 20:59:30
Finished topic modeing window 2014_12 20:59:30
Starting num topic 14 20:59:30
start modeling LDA Mallet 20:59:30
finished modeling LDA Mallet, number of topics 14, 21:01:04
Finished topic modeing window 2014_12 21:01:04
Starting num topic 16 21:01:04
start modeling LDA Mallet 21:01:04
finished modeling LDA Mallet, number of topics 16, 21:02:39
Finished topic modeing window 2014_12 21:02:39
Starting num topic 18 21:02:

finished modeling LDA Mallet, number of topics 14, 21:33:22
Finished topic modeing window 2015_04 21:33:22
Starting num topic 16 21:33:22
start modeling LDA Mallet 21:33:22
finished modeling LDA Mallet, number of topics 16, 21:33:53
Finished topic modeing window 2015_04 21:33:53
Starting num topic 18 21:33:53
start modeling LDA Mallet 21:33:53
finished modeling LDA Mallet, number of topics 18, 21:34:23
Finished topic modeing window 2015_04 21:34:23
Starting num topic 20 21:34:23
start modeling LDA Mallet 21:34:23
finished modeling LDA Mallet, number of topics 20, 21:34:54
Finished topic modeing window 2015_04 21:34:54
Starting num topic 22 21:34:54
start modeling LDA Mallet 21:34:54
finished modeling LDA Mallet, number of topics 22, 21:35:25
Finished topic modeing window 2015_04 21:35:25
Starting num topic 24 21:35:25
start modeling LDA Mallet 21:35:25
finished modeling LDA Mallet, number of topics 24, 21:35:56
Finished topic modeing window 2015_04 21:35:56
Starting num topic 26 21:35:

finished modeling LDA Mallet, number of topics 22, 21:58:06
Finished topic modeing window 2015_08 21:58:06
Starting num topic 24 21:58:06
start modeling LDA Mallet 21:58:06
finished modeling LDA Mallet, number of topics 24, 21:58:37
Finished topic modeing window 2015_08 21:58:37
Starting num topic 26 21:58:37
start modeling LDA Mallet 21:58:37
finished modeling LDA Mallet, number of topics 26, 21:59:08
Finished topic modeing window 2015_08 21:59:08
Starting num topic 28 21:59:08
start modeling LDA Mallet 21:59:08
finished modeling LDA Mallet, number of topics 28, 21:59:39
Finished topic modeing window 2015_08 21:59:39
Starting num topic 30 21:59:39
start modeling LDA Mallet 21:59:39
finished modeling LDA Mallet, number of topics 30, 22:00:10
Finished topic modeing window 2015_08 22:00:10
Starting num topic 10 22:00:10
start modeling LDA Mallet 22:00:10
finished modeling LDA Mallet, number of topics 10, 22:00:41
Finished topic modeing window 2015_09 22:00:41
Starting num topic 12 22:00:

finished modeling LDA Mallet, number of topics 30, 23:05:46
Finished topic modeing window 2015_12 23:05:46
Starting num topic 10 23:05:46
start modeling LDA Mallet 23:05:46
finished modeling LDA Mallet, number of topics 10, 23:07:04
Finished topic modeing window 2016_01 23:07:04
Starting num topic 12 23:07:04
start modeling LDA Mallet 23:07:04
finished modeling LDA Mallet, number of topics 12, 23:08:23
Finished topic modeing window 2016_01 23:08:23
Starting num topic 14 23:08:23
start modeling LDA Mallet 23:08:23
finished modeling LDA Mallet, number of topics 14, 23:09:42
Finished topic modeing window 2016_01 23:09:42
Starting num topic 16 23:09:42
start modeling LDA Mallet 23:09:42
finished modeling LDA Mallet, number of topics 16, 23:11:06
Finished topic modeing window 2016_01 23:11:06
Starting num topic 18 23:11:06
start modeling LDA Mallet 23:11:06
finished modeling LDA Mallet, number of topics 18, 23:12:25
Finished topic modeing window 2016_01 23:12:25
Starting num topic 20 23:12:

finished modeling LDA Mallet, number of topics 16, 00:48:25
Finished topic modeing window 2016_05 00:48:25
Starting num topic 18 00:48:25
start modeling LDA Mallet 00:48:25
finished modeling LDA Mallet, number of topics 18, 00:52:20
Finished topic modeing window 2016_05 00:52:20
Starting num topic 20 00:52:20
start modeling LDA Mallet 00:52:20
finished modeling LDA Mallet, number of topics 20, 00:56:10
Finished topic modeing window 2016_05 00:56:10
Starting num topic 22 00:56:10
start modeling LDA Mallet 00:56:10
finished modeling LDA Mallet, number of topics 22, 00:59:57
Finished topic modeing window 2016_05 00:59:57
Starting num topic 24 00:59:57
start modeling LDA Mallet 00:59:57
finished modeling LDA Mallet, number of topics 24, 01:03:45
Finished topic modeing window 2016_05 01:03:45
Starting num topic 26 01:03:45
start modeling LDA Mallet 01:03:45
finished modeling LDA Mallet, number of topics 26, 01:07:29
Finished topic modeing window 2016_05 01:07:29
Starting num topic 28 01:07:

finished modeling LDA Mallet, number of topics 24, 03:43:21
Finished topic modeing window 2016_09 03:43:21
Starting num topic 26 03:43:21
start modeling LDA Mallet 03:43:21
finished modeling LDA Mallet, number of topics 26, 03:47:13
Finished topic modeing window 2016_09 03:47:13
Starting num topic 28 03:47:13
start modeling LDA Mallet 03:47:13
finished modeling LDA Mallet, number of topics 28, 03:51:11
Finished topic modeing window 2016_09 03:51:11
Starting num topic 30 03:51:11
start modeling LDA Mallet 03:51:11
finished modeling LDA Mallet, number of topics 30, 03:55:09
Finished topic modeing window 2016_09 03:55:09
Starting num topic 10 03:55:09
start modeling LDA Mallet 03:55:09
finished modeling LDA Mallet, number of topics 10, 03:57:42
Finished topic modeing window 2016_10 03:57:42
Starting num topic 12 03:57:42
start modeling LDA Mallet 03:57:42
finished modeling LDA Mallet, number of topics 12, 04:00:15
Finished topic modeing window 2016_10 04:00:15
Starting num topic 14 04:00:

finished modeling LDA Mallet, number of topics 10, 05:18:52
Finished topic modeing window 2017_02 05:18:52
Starting num topic 12 05:18:52
start modeling LDA Mallet 05:18:52
finished modeling LDA Mallet, number of topics 12, 05:19:38
Finished topic modeing window 2017_02 05:19:38
Starting num topic 14 05:19:38
start modeling LDA Mallet 05:19:38
finished modeling LDA Mallet, number of topics 14, 05:20:23
Finished topic modeing window 2017_02 05:20:23
Starting num topic 16 05:20:23
start modeling LDA Mallet 05:20:23
finished modeling LDA Mallet, number of topics 16, 05:21:07
Finished topic modeing window 2017_02 05:21:07
Starting num topic 18 05:21:07
start modeling LDA Mallet 05:21:07
finished modeling LDA Mallet, number of topics 18, 05:21:52
Finished topic modeing window 2017_02 05:21:52
Starting num topic 20 05:21:52
start modeling LDA Mallet 05:21:52
finished modeling LDA Mallet, number of topics 20, 05:22:37
Finished topic modeing window 2017_02 05:22:37
Starting num topic 22 05:22:

finished modeling LDA Mallet, number of topics 18, 06:19:29
Finished topic modeing window 2017_06 06:19:29
Starting num topic 20 06:19:29
start modeling LDA Mallet 06:19:29
finished modeling LDA Mallet, number of topics 20, 06:20:58
Finished topic modeing window 2017_06 06:20:58
Starting num topic 22 06:20:58
start modeling LDA Mallet 06:20:58
finished modeling LDA Mallet, number of topics 22, 06:22:27
Finished topic modeing window 2017_06 06:22:27
Starting num topic 24 06:22:27
start modeling LDA Mallet 06:22:27
finished modeling LDA Mallet, number of topics 24, 06:23:55
Finished topic modeing window 2017_06 06:23:55
Starting num topic 26 06:23:55
start modeling LDA Mallet 06:23:55
finished modeling LDA Mallet, number of topics 26, 06:25:26
Finished topic modeing window 2017_06 06:25:26
Starting num topic 28 06:25:26
start modeling LDA Mallet 06:25:26
finished modeling LDA Mallet, number of topics 28, 06:26:55
Finished topic modeing window 2017_06 06:26:55
Starting num topic 30 06:26:

In [None]:
#(ldamodel.print_topics(num_topics=20, num_words=10))

In [None]:
# Visualize the LDA topics
pyLDAvis.enable_notebook()

lda_vis = pyLDAvis.gensim.prepare(ldamodel, corpus, dictionary)
pyLDAvis.display(lda_vis)

In [None]:
def explore_topic(ldamodel, topic_number, topn=30):
    """
    accept a user-supplied topic number and
    print out a formatted list of the top terms
    """
        
    print (u'{:20} {}'.format(u'term', u'frequency') + u'\n')

    for term, frequency in ldamodel.show_topic(topic_number, topn=30):
        print (u'{:20} {:.3f}'.format(term, round(frequency, 3)))

In [None]:
ldamodel = joblib.load('data/eos/lda/28_LDAmodel_EOS.pkl') 
explore_topic(ldamodel=ldamodel,topic_number=23)
