In [1]:
import sys
sys.path.append('../..')
import src.data.data_loader as dl
from src.features.vectorizer import Vectorizer
from src.models.topic_models import TopicModel
import pandas as pd
import sklearn.utils as skutil
pd.set_option('display.max_rows', None)



In [2]:
# Data 
language = 'english'
typex = 'forum'

# Vectorization
min_df = 0.005
max_df = 0.9

# Topic Modeling
algorithm = 'lda'
num_topics = 110

train_percentage = 0.9
alpha = 'auto'
eta = 0.01
iterations = 200
passes = 60
chunksize = 5000
kappa = 0
tau_0 = 0

In [3]:
data = dl.get_forum_threads_by_language(language, typex,kind = "tagged")
texts = data['thread_texts']

def min_length (texts,min_characters):
    neu = []
    for t in texts: 
        token_perdoc_list = t.split()
        token_min_character = []
        for token in token_perdoc_list: 
            if len(token)>= min_characters: 
                token_min_character.append(token)
        joined = (" ").join(token_min_character)
        neu.append(joined)
    return neu

texts = min_length(texts,3)

In [4]:
vec = Vectorizer('tf', texts, min_df=min_df, max_df=max_df)
vec.save('tagged/vectorizer/{}_{}_{}_{}_pos.pkl'.format(algorithm, language, "thread_texts","tagged"))

document_term_matrix = vec.get_document_token_matrix(texts)
id2token = vec.get_id2token_mapping()

In [5]:
document_term_matrix = skutil.shuffle(document_term_matrix, random_state=1)
num_docs, num_terms = document_term_matrix.shape
train = int(num_docs * train_percentage)
train_document_term_matrix = document_term_matrix[0:train, :]
test_document_term_matrix = document_term_matrix[train: num_docs, :]

In [6]:
model = TopicModel('lda', num_topics, train_document_term_matrix, id2token, alpha=alpha, eta=eta, iterations=iterations, passes=passes, chunksize=chunksize, test_document_term_matrix=test_document_term_matrix, decay=kappa, offset=tau_0)
model.save('tagged/topic_models/lda/{}_{}_{}_{}_{}.pkl'.format(algorithm, language, "thread_texts","tagged",num_topics))

2018-11-11 03:10:15,352 : INFO : using autotuned alpha, starting with [0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.009090909, 0.0090

2018-11-11 03:11:39,676 : INFO : topic #57 (0.008): 0.000*"picking" + 0.000*"pic" + 0.000*"pile" + 0.000*"piggy" + 0.000*"pig" + 0.000*"piece" + 0.000*"pie" + 0.000*"picture" + 0.000*"picky" + 0.000*"pickle"
2018-11-11 03:11:39,677 : INFO : topic #14 (0.008): 0.053*"<url>" + 0.051*"organic" + 0.034*"and" + 0.031*"what" + 0.031*"define" + 0.030*"regulation" + 0.024*"method" + 0.020*"denmark" + 0.018*"here" + 0.018*"can"
2018-11-11 03:11:39,677 : INFO : topic #64 (0.013): 0.039*"and" + 0.034*"not" + 0.030*"food" + 0.029*"organic" + 0.022*"that" + 0.016*"have" + 0.013*"for" + 0.011*"can" + 0.010*"eat" + 0.010*"with"
2018-11-11 03:11:39,678 : INFO : topic #81 (0.015): 0.045*"and" + 0.024*"not" + 0.021*"have" + 0.019*"that" + 0.014*"for" + 0.012*"this" + 0.011*"food" + 0.010*"gmo" + 0.009*"with" + 0.009*"monsanto"
2018-11-11 03:11:39,679 : INFO : topic #32 (0.016): 0.043*"and" + 0.039*"organic" + 0.022*"that" + 0.020*"not" + 0.018*"food" + 0.016*"have" + 0.014*"for" + 0.010*"more" + 0.010*"

2018-11-11 03:12:48,175 : INFO : topic #29 (0.007): 0.000*"picking" + 0.000*"pic" + 0.000*"pile" + 0.000*"piggy" + 0.000*"pig" + 0.000*"piece" + 0.000*"pie" + 0.000*"picture" + 0.000*"picky" + 0.000*"pickle"
2018-11-11 03:12:48,175 : INFO : topic #74 (0.007): 0.000*"picking" + 0.000*"pic" + 0.000*"pile" + 0.000*"piggy" + 0.000*"pig" + 0.000*"piece" + 0.000*"pie" + 0.000*"picture" + 0.000*"picky" + 0.000*"pickle"
2018-11-11 03:12:48,176 : INFO : topic #64 (0.018): 0.040*"and" + 0.037*"not" + 0.033*"food" + 0.028*"organic" + 0.023*"that" + 0.018*"have" + 0.013*"for" + 0.013*"eat" + 0.012*"can" + 0.010*"all"
2018-11-11 03:12:48,177 : INFO : topic #32 (0.023): 0.045*"organic" + 0.041*"and" + 0.026*"that" + 0.023*"food" + 0.021*"not" + 0.015*"have" + 0.015*"for" + 0.013*"pesticide" + 0.011*"more" + 0.009*"use"
2018-11-11 03:12:48,178 : INFO : topic #81 (0.023): 0.046*"and" + 0.022*"not" + 0.020*"have" + 0.018*"that" + 0.014*"for" + 0.012*"monsanto" + 0.012*"food" + 0.012*"this" + 0.011*"gmo

2018-11-11 03:13:59,647 : INFO : topic #74 (0.006): 0.000*"picking" + 0.000*"pic" + 0.000*"pile" + 0.000*"piggy" + 0.000*"pig" + 0.000*"piece" + 0.000*"pie" + 0.000*"picture" + 0.000*"picky" + 0.000*"pickle"
2018-11-11 03:13:59,647 : INFO : topic #64 (0.027): 0.042*"and" + 0.039*"not" + 0.032*"food" + 0.025*"organic" + 0.024*"that" + 0.020*"have" + 0.013*"eat" + 0.013*"can" + 0.013*"for" + 0.011*"all"
2018-11-11 03:13:59,648 : INFO : topic #32 (0.029): 0.047*"organic" + 0.040*"and" + 0.028*"that" + 0.025*"food" + 0.022*"not" + 0.015*"have" + 0.015*"for" + 0.013*"pesticide" + 0.011*"more" + 0.009*"there"
2018-11-11 03:13:59,649 : INFO : topic #81 (0.032): 0.048*"and" + 0.021*"not" + 0.020*"have" + 0.017*"that" + 0.015*"monsanto" + 0.014*"for" + 0.013*"food" + 0.012*"gmo" + 0.011*"this" + 0.009*"all"
2018-11-11 03:13:59,654 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:14:02,786 : INFO : Epoch 8: perplexity estimate: 343.61542951542305
2018-11-11 03:14:02,976 : INFO : PROGRESS: pas

2018-11-11 03:15:02,307 : INFO : topic #32 (0.035): 0.047*"organic" + 0.040*"and" + 0.029*"that" + 0.025*"food" + 0.022*"not" + 0.015*"have" + 0.015*"for" + 0.012*"pesticide" + 0.011*"more" + 0.010*"there"
2018-11-11 03:15:02,308 : INFO : topic #64 (0.039): 0.042*"and" + 0.040*"not" + 0.028*"food" + 0.025*"that" + 0.021*"organic" + 0.020*"have" + 0.014*"can" + 0.013*"eat" + 0.013*"for" + 0.012*"all"
2018-11-11 03:15:02,309 : INFO : topic #81 (0.040): 0.049*"and" + 0.020*"not" + 0.020*"have" + 0.017*"monsanto" + 0.016*"that" + 0.014*"for" + 0.013*"gmo" + 0.013*"food" + 0.010*"this" + 0.009*"all"
2018-11-11 03:15:02,313 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:15:05,368 : INFO : Epoch 11: perplexity estimate: 318.18324954855257
2018-11-11 03:15:05,560 : INFO : PROGRESS: pass 12, at document #2948/2948
2018-11-11 03:15:22,043 : INFO : optimized alpha [0.012697421, 0.008120117, 0.012171058, 0.008043272, 0.025795177, 0.007706168, 0.005707177, 0.007464083, 0.010191458, 0.008583672

2018-11-11 03:16:03,419 : INFO : topic #81 (0.047): 0.051*"and" + 0.020*"have" + 0.019*"not" + 0.019*"monsanto" + 0.015*"that" + 0.014*"gmo" + 0.013*"for" + 0.013*"food" + 0.009*"this" + 0.009*"with"
2018-11-11 03:16:03,419 : INFO : topic #64 (0.057): 0.043*"and" + 0.041*"not" + 0.025*"that" + 0.024*"food" + 0.021*"have" + 0.018*"organic" + 0.013*"can" + 0.013*"all" + 0.013*"for" + 0.012*"eat"
2018-11-11 03:16:03,424 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:16:06,303 : INFO : Epoch 14: perplexity estimate: 300.8442114517344
2018-11-11 03:16:06,501 : INFO : PROGRESS: pass 15, at document #2948/2948
2018-11-11 03:16:23,082 : INFO : optimized alpha [0.016881619, 0.008503523, 0.0145648755, 0.008685212, 0.031873442, 0.0077985125, 0.0053691105, 0.007497821, 0.011208643, 0.009041764, 0.008012327, 0.013012124, 0.0075929104, 0.015577474, 0.0065414156, 0.026760327, 0.018516568, 0.020314096, 0.020368824, 0.033259366, 0.027976153, 0.006413581, 0.012780644, 0.007345943, 0.008987421, 0.00

2018-11-11 03:17:06,739 : INFO : topic #64 (0.078): 0.044*"and" + 0.041*"not" + 0.025*"that" + 0.022*"food" + 0.021*"have" + 0.015*"organic" + 0.013*"can" + 0.013*"all" + 0.013*"for" + 0.012*"people"
2018-11-11 03:17:06,750 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:17:10,092 : INFO : Epoch 17: perplexity estimate: 287.83092489695
2018-11-11 03:17:10,218 : INFO : PROGRESS: pass 18, at document #2948/2948
2018-11-11 03:17:26,095 : INFO : optimized alpha [0.022043886, 0.008927968, 0.017511025, 0.009487653, 0.037722338, 0.007939632, 0.0051235775, 0.007597018, 0.012290926, 0.009560026, 0.008428539, 0.01499963, 0.007959411, 0.01872616, 0.0066302815, 0.031416304, 0.021095498, 0.022743983, 0.023091907, 0.041027155, 0.03180566, 0.0063926126, 0.015308451, 0.007673318, 0.010571134, 0.010442015, 0.010801045, 0.006650528, 0.00790362, 0.0044660624, 0.010684156, 0.010332493, 0.04832001, 0.013696816, 0.012042753, 0.013135162, 0.013815422, 0.01609828, 0.00812265, 0.010041066, 0.0063068424, 0.

2018-11-11 03:18:00,314 : INFO : topic #64 (0.102): 0.044*"and" + 0.041*"not" + 0.025*"that" + 0.022*"have" + 0.020*"food" + 0.013*"all" + 0.013*"for" + 0.013*"people" + 0.013*"can" + 0.012*"organic"
2018-11-11 03:18:00,319 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:18:02,724 : INFO : Epoch 20: perplexity estimate: 278.052978673085
2018-11-11 03:18:02,863 : INFO : PROGRESS: pass 21, at document #2948/2948
2018-11-11 03:18:16,673 : INFO : optimized alpha [0.027683055, 0.0093848035, 0.020832526, 0.010428414, 0.042693608, 0.008090936, 0.0049339705, 0.0077488758, 0.0134883225, 0.0100339325, 0.00886174, 0.017115768, 0.008391864, 0.022264637, 0.006829761, 0.03568973, 0.023527524, 0.025118874, 0.025590379, 0.04872428, 0.03549612, 0.0064326697, 0.018236194, 0.008059749, 0.012367708, 0.01154524, 0.012189315, 0.006701394, 0.008231163, 0.004173447, 0.012640129, 0.011229119, 0.054100033, 0.015373451, 0.013543058, 0.014642677, 0.0153037, 0.018914089, 0.00851915, 0.010938542, 0.0062702587, 

2018-11-11 03:18:50,503 : INFO : topic #64 (0.127): 0.044*"and" + 0.041*"not" + 0.025*"that" + 0.022*"have" + 0.018*"food" + 0.014*"all" + 0.013*"people" + 0.013*"for" + 0.012*"can" + 0.012*"what"
2018-11-11 03:18:50,508 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:18:52,922 : INFO : Epoch 23: perplexity estimate: 269.8529561362004
2018-11-11 03:18:53,051 : INFO : PROGRESS: pass 24, at document #2948/2948
2018-11-11 03:19:07,291 : INFO : optimized alpha [0.03355688, 0.0098841665, 0.02438454, 0.0115265455, 0.04697821, 0.008272991, 0.004778639, 0.007939631, 0.014730568, 0.010512017, 0.0093207685, 0.019244226, 0.008867288, 0.02598949, 0.007109014, 0.038972147, 0.025872517, 0.027458722, 0.027727703, 0.055834718, 0.039173227, 0.00652008, 0.021283515, 0.0085309325, 0.014301714, 0.012684855, 0.013701455, 0.0067608235, 0.008654915, 0.0039244974, 0.014901385, 0.012164906, 0.05936706, 0.017071338, 0.015191731, 0.016212894, 0.016748937, 0.022121204, 0.008975176, 0.011893944, 0.006260875, 0

2018-11-11 03:19:40,352 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:19:42,804 : INFO : Epoch 26: perplexity estimate: 263.3018633073859
2018-11-11 03:19:42,941 : INFO : PROGRESS: pass 27, at document #2948/2948
2018-11-11 03:19:55,980 : INFO : optimized alpha [0.038640812, 0.010395775, 0.027883608, 0.012695678, 0.05064515, 0.008476858, 0.0046546482, 0.008158463, 0.015935177, 0.010988725, 0.009818863, 0.021384407, 0.00933111, 0.029583676, 0.007422798, 0.041633278, 0.027965225, 0.02961349, 0.029413285, 0.062207785, 0.04266133, 0.0066444725, 0.024160309, 0.009052859, 0.016333124, 0.013852304, 0.015224196, 0.0068321046, 0.009160779, 0.003709255, 0.01731181, 0.0131056, 0.064553335, 0.018690335, 0.017031971, 0.017786972, 0.018160226, 0.025822164, 0.00944112, 0.01292314, 0.006276952, 0.023353854, 0.0055379397, 0.008153388, 0.012386288, 0.0114375055, 0.007542155, 0.045946527, 0.011464755, 0.044644494, 0.015438027, 0.01978432, 0.0066462625, 0.014856972, 0.027455451, 0.02534372, 0.011731

2018-11-11 03:20:30,905 : INFO : Epoch 29: perplexity estimate: 257.6924619746793
2018-11-11 03:20:31,059 : INFO : PROGRESS: pass 30, at document #2948/2948
2018-11-11 03:20:44,197 : INFO : optimized alpha [0.042897798, 0.010878392, 0.031188441, 0.013892212, 0.05397993, 0.008686123, 0.0045603886, 0.008395611, 0.017072199, 0.01143129, 0.010334755, 0.02331561, 0.0097655, 0.033183474, 0.007746283, 0.04361839, 0.029739685, 0.031618882, 0.030636005, 0.06790246, 0.045527343, 0.006777644, 0.026910225, 0.009595207, 0.018361181, 0.015029725, 0.016680602, 0.0069484795, 0.009738171, 0.0035205747, 0.019715412, 0.01407786, 0.06907066, 0.020328697, 0.01905128, 0.019350158, 0.019511973, 0.030168584, 0.009918877, 0.013930438, 0.0063227755, 0.025058342, 0.005600125, 0.0084153535, 0.013157508, 0.012006335, 0.0079713445, 0.050173666, 0.012042634, 0.048465904, 0.016544165, 0.021348651, 0.006784153, 0.016485114, 0.030034993, 0.02818901, 0.012399886, 0.0035206033, 0.015162984, 0.03967358, 0.029749205, 0.011

2018-11-11 03:21:17,069 : INFO : PROGRESS: pass 33, at document #2948/2948
2018-11-11 03:21:29,183 : INFO : optimized alpha [0.04682801, 0.011312324, 0.034279253, 0.015103154, 0.05703709, 0.008877615, 0.0044790977, 0.008679292, 0.018156128, 0.011849184, 0.0108784055, 0.025053501, 0.010209304, 0.036331385, 0.008079962, 0.04530608, 0.031244544, 0.033256467, 0.031548835, 0.073046036, 0.04797736, 0.0069429465, 0.02961307, 0.010144031, 0.02026038, 0.016167816, 0.01813917, 0.007110156, 0.010321375, 0.0033533452, 0.02208803, 0.014960335, 0.073579155, 0.021888744, 0.021092886, 0.020888753, 0.020835478, 0.035507843, 0.010390187, 0.014887469, 0.006415133, 0.0266003, 0.005704066, 0.008698676, 0.014004879, 0.012599282, 0.008487354, 0.053749796, 0.012638766, 0.051662322, 0.01760219, 0.022837512, 0.0069858264, 0.018202335, 0.032399625, 0.030690927, 0.0130598815, 0.0033533722, 0.016059324, 0.042951766, 0.032203548, 0.012189036, 0.043455105, 0.0090201665, 0.20058161, 0.08041216, 0.006111047, 0.0245700

2018-11-11 03:22:12,430 : INFO : optimized alpha [0.050283305, 0.011683534, 0.03713317, 0.016250998, 0.05984234, 0.009065323, 0.004426366, 0.00899342, 0.01922228, 0.0122625, 0.011450992, 0.026624646, 0.010624728, 0.038958386, 0.00841066, 0.046966147, 0.032516196, 0.03460847, 0.032348245, 0.0776471, 0.049986742, 0.007119243, 0.032091364, 0.010677854, 0.022041436, 0.01727138, 0.01948473, 0.0073245284, 0.010905173, 0.003203788, 0.02427382, 0.01574044, 0.077895194, 0.023297705, 0.023072395, 0.022231469, 0.02194947, 0.041444916, 0.010877087, 0.015826117, 0.0065429183, 0.028013386, 0.0058207624, 0.009004615, 0.014879003, 0.013151096, 0.0090550585, 0.056863308, 0.01325029, 0.05472271, 0.018676931, 0.024224391, 0.0072426046, 0.019863963, 0.034529045, 0.033033613, 0.013693004, 0.0032038104, 0.016867347, 0.045367856, 0.034140233, 0.012682413, 0.04642258, 0.0093829, 0.22335868, 0.08501484, 0.0063412418, 0.026061809, 0.034523513, 0.031584576, 0.019254537, 0.013011491, 0.0076032663, 0.01587074, 0.0

2018-11-11 03:22:53,267 : INFO : topic #57 (0.003): 0.000*"picking" + 0.000*"pic" + 0.000*"pile" + 0.000*"piggy" + 0.000*"pig" + 0.000*"piece" + 0.000*"pie" + 0.000*"picture" + 0.000*"picky" + 0.000*"pickle"
2018-11-11 03:22:53,268 : INFO : topic #29 (0.003): 0.000*"picking" + 0.000*"pic" + 0.000*"pile" + 0.000*"piggy" + 0.000*"pig" + 0.000*"piece" + 0.000*"pie" + 0.000*"picture" + 0.000*"picky" + 0.000*"pickle"
2018-11-11 03:22:53,269 : INFO : topic #97 (0.118): 0.713*"food" + 0.087*"organic" + 0.037*"health" + 0.033*"what" + 0.021*"make" + 0.016*"for" + 0.016*"question" + 0.013*"how" + 0.011*"answer" + 0.010*"ask"
2018-11-11 03:22:53,270 : INFO : topic #90 (0.130): 0.572*"organic" + 0.040*"certify" + 0.035*"usda" + 0.027*"what" + 0.022*"can" + 0.021*"mean" + 0.020*"certification" + 0.018*"non" + 0.017*"that" + 0.016*"how"
2018-11-11 03:22:53,270 : INFO : topic #64 (0.248): 0.046*"and" + 0.041*"not" + 0.026*"that" + 0.023*"have" + 0.014*"people" + 0.014*"all" + 0.014*"for" + 0.013*"fo

2018-11-11 03:23:39,159 : INFO : topic #29 (0.003): 0.000*"picking" + 0.000*"pic" + 0.000*"pile" + 0.000*"piggy" + 0.000*"pig" + 0.000*"piece" + 0.000*"pie" + 0.000*"picture" + 0.000*"picky" + 0.000*"pickle"
2018-11-11 03:23:39,161 : INFO : topic #97 (0.122): 0.727*"food" + 0.078*"organic" + 0.036*"health" + 0.032*"what" + 0.021*"make" + 0.016*"question" + 0.015*"for" + 0.013*"how" + 0.011*"answer" + 0.011*"ask"
2018-11-11 03:23:39,162 : INFO : topic #90 (0.137): 0.580*"organic" + 0.039*"certify" + 0.033*"usda" + 0.028*"what" + 0.024*"can" + 0.020*"mean" + 0.019*"non" + 0.019*"certification" + 0.016*"how" + 0.016*"that"
2018-11-11 03:23:39,163 : INFO : topic #64 (0.275): 0.046*"and" + 0.041*"not" + 0.026*"that" + 0.024*"have" + 0.014*"people" + 0.014*"all" + 0.014*"for" + 0.013*"food" + 0.013*"what" + 0.013*"this"
2018-11-11 03:23:39,168 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:23:41,775 : INFO : Epoch 42: perplexity estimate: 240.67888132428573
2018-11-11 03:23:41,962 : INF

2018-11-11 03:24:27,088 : INFO : topic #97 (0.125): 0.739*"food" + 0.068*"organic" + 0.036*"health" + 0.032*"what" + 0.021*"make" + 0.017*"question" + 0.014*"for" + 0.012*"how" + 0.011*"answer" + 0.011*"ask"
2018-11-11 03:24:27,089 : INFO : topic #90 (0.144): 0.586*"organic" + 0.037*"certify" + 0.031*"usda" + 0.029*"what" + 0.025*"can" + 0.020*"non" + 0.020*"mean" + 0.018*"certification" + 0.017*"how" + 0.015*"that"
2018-11-11 03:24:27,091 : INFO : topic #64 (0.309): 0.046*"and" + 0.041*"not" + 0.026*"that" + 0.024*"have" + 0.015*"people" + 0.014*"all" + 0.014*"for" + 0.013*"what" + 0.013*"this" + 0.013*"food"
2018-11-11 03:24:27,095 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:24:29,692 : INFO : Epoch 45: perplexity estimate: 237.892912838895
2018-11-11 03:24:29,873 : INFO : PROGRESS: pass 46, at document #2948/2948
2018-11-11 03:24:42,408 : INFO : optimized alpha [0.058131266, 0.0126729915, 0.04361604, 0.020166775, 0.067074984, 0.009705569, 0.004368854, 0.010055888, 0.02215884

2018-11-11 03:25:14,370 : INFO : topic #90 (0.151): 0.592*"organic" + 0.036*"certify" + 0.030*"usda" + 0.029*"what" + 0.027*"can" + 0.021*"non" + 0.020*"mean" + 0.017*"how" + 0.017*"certification" + 0.015*"for"
2018-11-11 03:25:14,371 : INFO : topic #64 (0.353): 0.046*"and" + 0.041*"not" + 0.026*"that" + 0.024*"have" + 0.015*"people" + 0.014*"all" + 0.014*"for" + 0.013*"this" + 0.013*"what" + 0.012*"food"
2018-11-11 03:25:14,376 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:25:16,940 : INFO : Epoch 48: perplexity estimate: 235.39239767588336
2018-11-11 03:25:17,121 : INFO : PROGRESS: pass 49, at document #2948/2948
2018-11-11 03:25:30,603 : INFO : optimized alpha [0.05980393, 0.012933053, 0.04490267, 0.021414798, 0.06890236, 0.009919684, 0.0043704095, 0.010403, 0.02290601, 0.013796287, 0.014859757, 0.031749964, 0.012530878, 0.04721968, 0.010180529, 0.052398995, 0.036399823, 0.038480114, 0.03428843, 0.093061365, 0.05627717, 0.00806104, 0.039742813, 0.012888956, 0.028245365, 0.0211

2018-11-11 03:26:01,420 : INFO : topic #64 (0.420): 0.047*"and" + 0.041*"not" + 0.026*"that" + 0.024*"have" + 0.015*"people" + 0.015*"all" + 0.014*"for" + 0.013*"this" + 0.013*"what" + 0.012*"food"
2018-11-11 03:26:01,425 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:26:03,956 : INFO : Epoch 51: perplexity estimate: 233.20794730942376
2018-11-11 03:26:04,139 : INFO : PROGRESS: pass 52, at document #2948/2948
2018-11-11 03:26:16,365 : INFO : optimized alpha [0.06143852, 0.013164534, 0.046078555, 0.02275045, 0.070496984, 0.010136632, 0.0043732235, 0.01071914, 0.023565846, 0.014103847, 0.015684363, 0.032589078, 0.012934586, 0.048678506, 0.010577806, 0.0534688, 0.037096243, 0.03920727, 0.034591433, 0.095860735, 0.057279445, 0.0083368635, 0.040791065, 0.0133762555, 0.029383728, 0.021828411, 0.024100348, 0.009087723, 0.013819306, 0.0026089055, 0.032107163, 0.01834546, 0.09258389, 0.02858428, 0.030407894, 0.027565151, 0.027510539, 0.063678, 0.013143011, 0.019469108, 0.0075773313, 0.0327

2018-11-11 03:26:46,756 : INFO : topic diff=inf, rho=1.000000
2018-11-11 03:26:49,286 : INFO : Epoch 54: perplexity estimate: 231.16916053512506
2018-11-11 03:26:49,477 : INFO : PROGRESS: pass 55, at document #2948/2948
2018-11-11 03:27:01,337 : INFO : optimized alpha [0.06291101, 0.013364746, 0.0473472, 0.024260476, 0.072126076, 0.010344334, 0.00439216, 0.011064801, 0.024214698, 0.0144053595, 0.016473895, 0.033358417, 0.013344639, 0.04993504, 0.010937838, 0.05444954, 0.03773194, 0.039830055, 0.035065994, 0.098834954, 0.058470067, 0.008626406, 0.041734204, 0.013844252, 0.030394156, 0.022435568, 0.024751624, 0.009536853, 0.014269268, 0.0025244176, 0.03294504, 0.01875108, 0.095366575, 0.02938396, 0.031223558, 0.02834002, 0.028516144, 0.06615255, 0.013559258, 0.020024104, 0.0079619875, 0.03334109, 0.006796911, 0.011219494, 0.019344306, 0.01581667, 0.015360934, 0.06967678, 0.01668723, 0.069202736, 0.023535663, 0.030489624, 0.010668813, 0.027269531, 0.044495687, 0.040450983, 0.01728546, 0.0

2018-11-11 03:27:34,040 : INFO : PROGRESS: pass 58, at document #2948/2948
2018-11-11 03:27:45,898 : INFO : optimized alpha [0.0642498, 0.013560667, 0.048298594, 0.025936138, 0.07354763, 0.010534762, 0.0044171615, 0.011358095, 0.024837336, 0.014685249, 0.017193545, 0.03399926, 0.013747891, 0.05094958, 0.0112745, 0.055328898, 0.038346067, 0.040248815, 0.035546843, 0.10164816, 0.059512034, 0.0088938465, 0.042494904, 0.014263295, 0.03124845, 0.022987606, 0.0253606, 0.010029097, 0.014708831, 0.0024461383, 0.03369786, 0.019095488, 0.09726517, 0.030077746, 0.03198238, 0.029153898, 0.029391512, 0.0687632, 0.013990891, 0.02050709, 0.008344834, 0.033853456, 0.006954448, 0.011612928, 0.019848002, 0.016173616, 0.016689597, 0.07112892, 0.01706809, 0.070918985, 0.024165075, 0.031370886, 0.0114854155, 0.02807735, 0.045948148, 0.041035805, 0.017777536, 0.0024461404, 0.021537213, 0.052117754, 0.042021126, 0.015477916, 0.06084841, 0.011983233, 0.63928604, 0.10520898, 0.008610378, 0.03475521, 0.04127251