## Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from scipy.stats import hmean
from multiprocessing import Pool

import tokenizers as t
import lda_implementation

In [3]:
# read in data
filtered_10K = pd.read_csv('datasets/10K_filtered_restaurant_reviews.csv', encoding='utf-8')
coded = pd.read_excel('datasets/manually_coded_reviews.xlsx', encoding='utf-8')
coded.fillna(0, inplace=True)

# Clean
coded['serv'] = (coded.service + coded.staff).apply(lambda x: 1 if x > 0 else 0)
coded['pos_serv'] = (coded.pos_service + coded.pos_staff).apply(lambda x: 1 if x > 0 else 0)
coded['neg_serv'] = (coded.neg_service + coded.neg_staff).apply(lambda x: 1 if x > 0 else 0)
coded.drop(['service', 'staff', 'pos_service', 'pos_staff', 'neg_service', 'neg_staff'], axis=1, inplace=True)
coded.drop(['ambiance', 'cleanliness', 'pos_amb', u'neg_amb', 'pos_clean', 'neg_clean'], axis=1, inplace=True)

## Test a few different N-Topics, Alphas, and Betas
find ranges to do a more targeted grid-search on

In [3]:
n_topicss = [10,  50,   75, 20,   20,  20,  20,  20,    20,   20, 20]
alphas =    [.1,  .1,   .1, .001, .01, 1,   10,  .1,    .1,   .1, .1]
etas =      [.01, .01, .01, .01,  .01, .01, .01, .0001, .001, .1, 1 ]
tests = zip(n_topicss, alphas, etas)

count_vec, rev_vec, rev_badlines = t.fitApplyVectorizer(filtered_10K.text,
                                     t.foodwordPolarityTokenizer)
reviews = filtered_10K.copy()
reviews = reviews[reviews.text.apply(lambda x: False if x in rev_badlines else True)]

code_vec, code_badlines = t.applyVectorizer(coded.text, t.foodwordPolarityTokenizer, count_vec)
code_rev = coded.copy()
code_rev = code_rev[code_rev.text.apply(lambda x: False if x in code_badlines else True)]

def testParams((n_topics, alpha, eta)):
    imp = lda_implementation.compareToManualNoPrint(reviews.text, rev_vec, code_rev, code_vec,
                                             n_topics=n_topics, alpha=alpha, eta=eta)
    return ('n_topics: {}, alpha: {}, eta: {}'.format(str(n_topics), str(alpha), str(eta)), imp)

In [4]:
p = Pool()
top_scores = p.map(testParams, tests)
p.close()
p.join()

INFO:lda:n_documents: 9689
INFO:lda:n_documents: 9689
INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_documents: 9689
INFO:lda:n_words: 616730
INFO:lda:vocab_size: 19491
INFO:lda:n_topics: 50
INFO:lda:vocab_size: 19491
INFO:lda:vocab_size: 19491
INFO:lda:n_iter: 2000
INFO:lda:n_words: 616730
INFO:lda:n_topics: 10
INFO:lda:n_words: 616730
INFO:lda:n_topics: 75
INFO:lda:n_words: 616730
INFO:lda:n_iter: 2000
INFO:lda:n_iter: 2000
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -5669204
INFO:lda:<0> log likelihood: -7070915
INFO:lda:<0> log likelihood: -7597607
INFO:lda:<0> log likelihood: -7219311
INFO:lda:<1000> log likelihood: -4327697
INFO:lda:<1000> log likelihood: -4250552
INFO:lda:<1999> log likelihood: -4324852


pos_food [('topic_7', 'topic_8'), 0.73015873015873023]
neg_food ['topic_5', 0.84454470877768661]
pos_wait [('topic_3', 'topic_7'), 0.75317875841436055]
neg_wait ['topic_2', 0.79459459459459458]
pos_price [('topic_1', 'topic_7'), 0.69066666666666665]
neg_price [('topic_1', 'topic_5'), 0.84844444444444445]
pos_serv [('topic_7', 'topic_8'), 0.73920032147880255]
neg_serv ['topic_2', 0.86525877453896494]
Done with n=10, b=0.1, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6673795
INFO:lda:<1999> log likelihood: -4224183


pos_food [('topic_7', 'topic_11'), 0.80359788359788364]
neg_food ['topic_13', 0.84686901832102812]
pos_wait ['topic_9', 0.7808526551982049]
neg_wait [('topic_6', 'topic_12'), 0.80270270270270272]
pos_price ['topic_9', 0.71666666666666667]
neg_price [('topic_4', 'topic_13'), 0.81488888888888888]
pos_serv [('topic_0', 'topic_15'), 0.68665862969660441]
neg_serv [('topic_6', 'topic_12'), 0.85797144556811422]
Done with n=20, b=0.001, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6067374
INFO:lda:<1000> log likelihood: -4289222
INFO:lda:<1000> log likelihood: -4569624
INFO:lda:<1000> log likelihood: -4484293
INFO:lda:<1999> log likelihood: -4276647


pos_food [('topic_1', 'topic_10'), 0.73089947089947094]
neg_food ['topic_7', 0.83415367787804218]
pos_wait [('topic_0', 'topic_10'), 0.76215407629020193]
neg_wait ['topic_6', 0.79027027027027019]
pos_price [('topic_4', 'topic_13'), 0.73955555555555552]
neg_price [('topic_7', 'topic_13'), 0.86844444444444446]
pos_serv [('topic_10', 'topic_17'), 0.74422342776773154]
neg_serv ['topic_6', 0.87061273051754917]
Done with n=20, b=0.01, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -5970800
INFO:lda:<1000> log likelihood: -4654142
INFO:lda:<1999> log likelihood: -4479748


pos_food [('topic_2', 'topic_3'), 0.7659259259259259]
neg_food ['topic_7', 0.80065627563576702]
pos_wait [('topic_3', 'topic_13'), 0.74794315632011965]
neg_wait [('topic_2', 'topic_5'), 0.83540540540540542]
pos_price ['topic_13', 0.70977777777777784]
neg_price [('topic_4', 'topic_7'), 0.81111111111111112]
pos_serv ['topic_3', 0.77054450472171976]
neg_serv [('topic_5', 'topic_7'), 0.8645151695419393]
Done with n=20, b=1, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6695097
INFO:lda:<1000> log likelihood: -4304792
INFO:lda:<1999> log likelihood: -4564949


pos_food [('topic_0', 'topic_4'), 0.75195767195767194]
neg_food [('topic_26', 'topic_45'), 0.85097074104457204]
pos_wait [('topic_10', 'topic_44'), 0.82572924457741204]
neg_wait [('topic_12', 'topic_25'), 0.80486486486486486]
pos_price [('topic_7', 'topic_44'), 0.80333333333333334]
neg_price [('topic_26', 'topic_49'), 0.87755555555555553]
pos_serv [('topic_7', 'topic_10'), 0.77446252762708467]
neg_serv [('topic_12', 'topic_37'), 0.83105294467578816]
Done with n=50, b=0.1, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6494768
INFO:lda:<1000> log likelihood: -4576058
INFO:lda:<1999> log likelihood: -4295323


pos_food [('topic_0', 'topic_19'), 0.7857142857142857]
neg_food [('topic_13', 'topic_17'), 0.76715887339349198]
pos_wait [('topic_16', 'topic_19'), 0.77075542258788332]
neg_wait [('topic_5', 'topic_13'), 0.76621621621621627]
pos_price ['topic_16', 0.76866666666666672]
neg_price [('topic_13', 'topic_16'), 0.70999999999999996]
pos_serv [('topic_11', 'topic_19'), 0.71930882057464329]
neg_serv [('topic_5', 'topic_13'), 0.85975609756097571]
Done with n=20, b=10, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6225326
INFO:lda:<1000> log likelihood: -4469440
INFO:lda:<1999> log likelihood: -4545494


pos_food [('topic_0', 'topic_11'), 0.74708994708994714]
neg_food [('topic_5', 'topic_18'), 0.77153404429860539]
pos_wait [('topic_5', 'topic_11'), 0.81039640987284967]
neg_wait [('topic_10', 'topic_15'), 0.8002702702702702]
pos_price [('topic_11', 'topic_19'), 0.69022222222222229]
neg_price [('topic_8', 'topic_18'), 0.77177777777777778]
pos_serv [('topic_0', 'topic_11'), 0.71338155515370705]
neg_serv ['topic_15', 0.82436049970255798]
Done with n=20, b=0.1, c=0.0001


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6489844
INFO:lda:<1000> log likelihood: -4460847
INFO:lda:<1999> log likelihood: -4446752


pos_food [('topic_3', 'topic_9'), 0.72042328042328052]
neg_food [('topic_5', 'topic_13'), 0.85233798195242005]
pos_wait [('topic_2', 'topic_9'), 0.78347045624532541]
neg_wait ['topic_15', 0.84351351351351345]
pos_price [('topic_7', 'topic_9'), 0.72999999999999998]
neg_price [('topic_7', 'topic_13'), 0.84911111111111104]
pos_serv [('topic_9', 'topic_17'), 0.71046815350612824]
neg_serv ['topic_15', 0.84250446162998216]
Done with n=20, b=0.1, c=0.001


INFO:lda:<1000> log likelihood: -4113494
INFO:lda:<1999> log likelihood: -4450745


pos_food [('topic_1', 'topic_15'), 0.7484656084656085]
neg_food ['topic_2', 0.85110746513535684]
pos_wait [('topic_0', 'topic_15'), 0.77262528047868362]
neg_wait [('topic_10', 'topic_11'), 0.82162162162162156]
pos_price [('topic_15', 'topic_19'), 0.76777777777777778]
neg_price [('topic_2', 'topic_19'), 0.83999999999999986]
pos_serv [('topic_9', 'topic_15'), 0.76270845891099048]
neg_serv [('topic_2', 'topic_10'), 0.84994051160023798]
Done with n=20, b=0.1, c=0.1


INFO:lda:<1999> log likelihood: -4648499


pos_food [('topic_57', 'topic_74'), 0.74888888888888894]
neg_food [('topic_0', 'topic_68'), 0.81501230516817069]
pos_wait [('topic_6', 'topic_53'), 0.83507853403141352]
neg_wait [('topic_30', 'topic_36'), 0.86108108108108106]
pos_price ['topic_6', 0.89955555555555555]
neg_price [('topic_40', 'topic_68'), 0.89644444444444449]
pos_serv [('topic_11', 'topic_41'), 0.77024311834438419]
neg_serv [('topic_36', 'topic_56'), 0.82882212968471147]
Done with n=75, b=0.1, c=0.01


INFO:lda:<1999> log likelihood: -4113036


pos_food [('topic_0', 'topic_18'), 0.81291005291005292]
neg_food [('topic_1', 'topic_11'), 0.6143013398960897]
pos_wait [('topic_3', 'topic_6'), 0.5718025430067315]
neg_wait ['topic_1', 0.72432432432432425]
pos_price [('topic_0', 'topic_8'), 0.74488888888888882]
neg_price [('topic_1', 'topic_8'), 0.60711111111111105]
pos_serv [('topic_0', 'topic_6'), 0.65029134016475787]
neg_serv ['topic_1', 0.83283759666864965]
Done with n=20, b=0.1, c=1


In [5]:
top_scores

[('n_topics: 10, alpha: 0.1, eta: 0.01',
  [('pos_food', [('topic_7', 'topic_8'), 0.73015873015873023]),
   ('neg_food', ['topic_5', 0.84454470877768661]),
   ('pos_wait', [('topic_3', 'topic_7'), 0.75317875841436055]),
   ('neg_wait', ['topic_2', 0.79459459459459458]),
   ('pos_price', [('topic_1', 'topic_7'), 0.69066666666666665]),
   ('neg_price', [('topic_1', 'topic_5'), 0.84844444444444445]),
   ('pos_serv', [('topic_7', 'topic_8'), 0.73920032147880255]),
   ('neg_serv', ['topic_2', 0.86525877453896494])]),
 ('n_topics: 50, alpha: 0.1, eta: 0.01',
  [('pos_food', [('topic_0', 'topic_4'), 0.75195767195767194]),
   ('neg_food', [('topic_26', 'topic_45'), 0.85097074104457204]),
   ('pos_wait', [('topic_10', 'topic_44'), 0.82572924457741204]),
   ('neg_wait', [('topic_12', 'topic_25'), 0.80486486486486486]),
   ('pos_price', [('topic_7', 'topic_44'), 0.80333333333333334]),
   ('neg_price', [('topic_26', 'topic_49'), 0.87755555555555553]),
   ('pos_serv', [('topic_7', 'topic_10'), 0.77

In [4]:
# compare summary stats of parameters
for parameters, values in top_scores:
    vals = [value[1][1] for value in values]
    print "{}: mean: {:.3f}, hmean: {:.3f}, min-max: {:.3f}-{:.3f}".format(
        parameters, np.mean(vals), hmean(vals), min(vals), max(vals))

n_topics: 10, alpha: 0.1, eta: 0.01: mean: 0.783, hmean: 0.779, min-max: 0.691-0.865
n_topics: 50, alpha: 0.1, eta: 0.01: mean: 0.815, hmean: 0.813, min-max: 0.752-0.878
n_topics: 75, alpha: 0.1, eta: 0.01: mean: 0.832, hmean: 0.829, min-max: 0.749-0.900
n_topics: 20, alpha: 0.001, eta: 0.01: mean: 0.789, hmean: 0.785, min-max: 0.687-0.858
n_topics: 20, alpha: 0.01, eta: 0.01: mean: 0.793, hmean: 0.789, min-max: 0.731-0.871
n_topics: 20, alpha: 1, eta: 0.01: mean: 0.788, hmean: 0.785, min-max: 0.710-0.865
n_topics: 20, alpha: 10, eta: 0.01: mean: 0.768, hmean: 0.766, min-max: 0.710-0.860
n_topics: 20, alpha: 0.1, eta: 0.0001: mean: 0.766, hmean: 0.764, min-max: 0.690-0.824
n_topics: 20, alpha: 0.1, eta: 0.001: mean: 0.791, hmean: 0.787, min-max: 0.710-0.852
n_topics: 20, alpha: 0.1, eta: 0.1: mean: 0.802, hmean: 0.800, min-max: 0.748-0.851
n_topics: 20, alpha: 0.1, eta: 1: mean: 0.695, hmean: 0.683, min-max: 0.572-0.833


In [16]:
# compare summary stats of parameters - negative only
for parameters, values in top_scores:
    vals = []
    for value in values:
        if value[0].split('_')[0] == 'neg':
            vals += [value[1][1]]
    print "{}: mean: {:.3f}, hmean: {:.3f}, min-max: {:.3f}-{:.3f}".format(
        parameters, np.mean(vals), hmean(vals), min(vals), max(vals))

n_topics: 10, alpha: 0.1, eta: 0.01: mean: 0.838, hmean: 0.837, min-max: 0.795-0.865
n_topics: 50, alpha: 0.1, eta: 0.01: mean: 0.841, hmean: 0.840, min-max: 0.805-0.878
n_topics: 75, alpha: 0.1, eta: 0.01: mean: 0.850, hmean: 0.849, min-max: 0.815-0.896
n_topics: 20, alpha: 0.001, eta: 0.01: mean: 0.831, hmean: 0.830, min-max: 0.803-0.858
n_topics: 20, alpha: 0.01, eta: 0.01: mean: 0.841, hmean: 0.840, min-max: 0.790-0.871
n_topics: 20, alpha: 1, eta: 0.01: mean: 0.828, hmean: 0.827, min-max: 0.801-0.865
n_topics: 20, alpha: 10, eta: 0.01: mean: 0.776, hmean: 0.772, min-max: 0.710-0.860
n_topics: 20, alpha: 0.1, eta: 0.0001: mean: 0.792, hmean: 0.791, min-max: 0.772-0.824
n_topics: 20, alpha: 0.1, eta: 0.001: mean: 0.847, hmean: 0.847, min-max: 0.843-0.852
n_topics: 20, alpha: 0.1, eta: 0.1: mean: 0.841, hmean: 0.840, min-max: 0.822-0.851
n_topics: 20, alpha: 0.1, eta: 1: mean: 0.695, hmean: 0.683, min-max: 0.607-0.833


None seem to have *great* scores for positive attributes, and I believe identifying negative attributes is more important than positive ones. The baseline (n_topics = 20, alpha = 0.1, beta = 0.01) did the best on these negative attributes (The average was 0.869). Therefore, I will gridsearch around these baseline values.

In [17]:
n_topicss = [18, 20, 22]
alphas = [.08, .1, .2]
etas = [.008, .01, .02]

count_vec, rev_vec, rev_badlines = t.fitApplyVectorizer(filtered_10K.text,
                                     t.foodwordPolarityTokenizer)
reviews = filtered_10K.copy()
reviews = reviews[reviews.text.apply(
        lambda x: False if x in rev_badlines else True)]

code_vec, code_badlines = t.applyVectorizer(coded.text, t.foodwordPolarityTokenizer,
                                           count_vec)
code_rev = coded.copy()
code_rev = code_rev[code_rev.text.apply(
        lambda x: False if x in code_badlines else True)]

def testParams((n_topics, alpha, eta)):
    imp = lda_implementation.compareToManualNoPrint(reviews.text, rev_vec, code_rev, code_vec,
                                             n_topics=n_topics, alpha=alpha, eta=eta)
    return ('n_topics: {}, alpha: {}, eta: {}'.format(str(n_topics), str(alpha), str(eta)), imp)

In [18]:
p = Pool()
top_param_scores = p.map(testParams, [(n, a, b) for n in n_topicss for a in alphas for b in etas])
p.close()
p.join()

INFO:lda:n_documents: 9689
INFO:lda:n_documents: 9689
INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_topics: 18
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_iter: 2000
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_words: 616730
INFO:lda:n_topics: 18
INFO:lda:n_topics: 18
INFO:lda:n_topics: 18
INFO:lda:n_iter: 2000
INFO:lda:n_iter: 2000
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6143325
INFO:lda:<0> log likelihood: -6196487
INFO:lda:<0> log likelihood: -6210136
INFO:lda:<0> log likelihood: -6254037
INFO:lda:<1000> log likelihood: -4380218
INFO:lda:<1000> log likelihood: -4395827
INFO:lda:<1000> log likelihood: -4401584
INFO:lda:<1000> log likelihood: -4449571
INFO:lda:<1999> log likelihood: -4385416
INFO:lda:<1999> log likelihood: -4382493
INFO:lda:<1999> log likelihood: -4399150
INFO:lda:<1999> log likelihood: -4442954


pos_food [('topic_3', 'topic_16'), 0.73724867724867715]
neg_food [('topic_2', 'topic_5'), 0.84276729559748442]
pos_wait [('topic_3', 'topic_7'), 0.79842931937172767]
neg_wait ['topic_10', 0.80972972972972979]
pos_price [('topic_3', 'topic_12'), 0.7082222222222222]
neg_price [('topic_5', 'topic_6'), 0.87688888888888883]
pos_serv [('topic_0', 'topic_3'), 0.73447860156720912]
neg_serv [('topic_5', 'topic_10'), 0.85707911957168359]
Done with n=18, b=0.08, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 18
INFO:lda:n_iter: 2000


pos_food [('topic_0', 'topic_17'), 0.70931216931216934]
neg_food ['topic_6', 0.83059885151763735]
pos_wait [('topic_4', 'topic_17'), 0.79693343305908748]
neg_wait ['topic_10', 0.81621621621621621]
pos_price [('topic_13', 'topic_17'), 0.78800000000000003]
neg_price [('topic_6', 'topic_16'), 0.85199999999999998]
pos_serv [('topic_5', 'topic_14'), 0.740506329113924]
neg_serv ['topic_10', 0.86481261154074962]
Done with n=18, b=0.08, c=0.02


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 18
INFO:lda:n_iter: 2000


pos_food [('topic_0', 'topic_8'), 0.70126984126984127]
neg_food ['topic_6', 0.83675143560295318]
pos_wait [('topic_4', 'topic_8'), 0.81413612565445026]
neg_wait ['topic_5', 0.81054054054054059]
pos_price [('topic_8', 'topic_14'), 0.76777777777777789]
neg_price [('topic_6', 'topic_7'), 0.85022222222222221]
pos_serv [('topic_8', 'topic_15'), 0.73367490456098061]
neg_serv [('topic_5', 'topic_6'), 0.86377156454491366]
Done with n=18, b=0.1, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 18
INFO:lda:n_iter: 2000


pos_food [('topic_4', 'topic_8'), 0.74878306878306888]
neg_food ['topic_6', 0.84960350013672403]
pos_wait [('topic_15', 'topic_17'), 0.75130890052356025]
neg_wait ['topic_5', 0.84432432432432436]
pos_price [('topic_13', 'topic_14'), 0.80488888888888888]
neg_price ['topic_6', 0.84755555555555551]
pos_serv ['topic_8', 0.78300180831826405]
neg_serv ['topic_5', 0.85916121356335518]
Done with n=18, b=0.2, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 18
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6225370
INFO:lda:<0> log likelihood: -6238803
INFO:lda:<0> log likelihood: -6167820
INFO:lda:<0> log likelihood: -6128092
INFO:lda:<1000> log likelihood: -4401956
INFO:lda:<1000> log likelihood: -4408797
INFO:lda:<1000> log likelihood: -4407397
INFO:lda:<1000> log likelihood: -4452335
INFO:lda:<1999> log likelihood: -4398115
INFO:lda:<1999> log likelihood: -4403718
INFO:lda:<1999> log likelihood: -4407179


pos_food [('topic_1', 'topic_15'), 0.70761904761904759]
neg_food [('topic_7', 'topic_11'), 0.76278370248837846]
pos_wait [('topic_3', 'topic_14'), 0.76963350785340312]
neg_wait [('topic_5', 'topic_10'), 0.81918918918918915]
pos_price [('topic_3', 'topic_12'), 0.72933333333333328]
neg_price [('topic_6', 'topic_7'), 0.86022222222222222]
pos_serv [('topic_3', 'topic_14'), 0.74372111713883871]
neg_serv [('topic_5', 'topic_10'), 0.8761154074955384]
Done with n=18, b=0.08, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 18
INFO:lda:n_iter: 2000


pos_food [('topic_8', 'topic_17'), 0.74888888888888894]
neg_food [('topic_2', 'topic_6'), 0.81473885698660098]
pos_wait [('topic_8', 'topic_16'), 0.82722513089005234]
neg_wait ['topic_5', 0.83405405405405408]
pos_price [('topic_8', 'topic_12'), 0.77711111111111109]
neg_price [('topic_2', 'topic_6'), 0.82933333333333337]
pos_serv [('topic_8', 'topic_15'), 0.72654209363070121]
neg_serv [('topic_5', 'topic_10'), 0.85187388459250446]
Done with n=18, b=0.1, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000


pos_food [('topic_8', 'topic_16'), 0.75312169312169308]
neg_food [('topic_6', 'topic_16'), 0.85165436149849605]
pos_wait [('topic_4', 'topic_9'), 0.77000747943156322]
neg_wait ['topic_5', 0.83378378378378371]
pos_price [('topic_13', 'topic_17'), 0.79444444444444451]
neg_price [('topic_3', 'topic_6'), 0.8368888888888889]
pos_serv [('topic_8', 'topic_9'), 0.76120152702431176]
neg_serv [('topic_5', 'topic_10'), 0.85098155859607372]
Done with n=18, b=0.1, c=0.02


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<1999> log likelihood: -4452389


pos_food [('topic_0', 'topic_3'), 0.7124867724867725]
neg_food [('topic_6', 'topic_16'), 0.85233798195242005]
pos_wait [('topic_4', 'topic_7'), 0.72961854899027667]
neg_wait ['topic_5', 0.80891891891891898]
pos_price [('topic_7', 'topic_13'), 0.80222222222222228]
neg_price [('topic_2', 'topic_6'), 0.83488888888888879]
pos_serv ['topic_3', 0.74984930681133222]
neg_serv [('topic_5', 'topic_9'), 0.85648423557406295]
Done with n=18, b=0.2, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6085775
INFO:lda:<0> log likelihood: -6327134
INFO:lda:<0> log likelihood: -6342830
INFO:lda:<0> log likelihood: -6267652
INFO:lda:<1000> log likelihood: -4468409
INFO:lda:<1000> log likelihood: -4422798
INFO:lda:<1000> log likelihood: -4402553
INFO:lda:<1000> log likelihood: -4429584
INFO:lda:<1999> log likelihood: -4463469


pos_food [('topic_1', 'topic_3'), 0.73195767195767203]
neg_food [('topic_5', 'topic_16'), 0.8192507519824993]
pos_wait [('topic_3', 'topic_17'), 0.75130890052356025]
neg_wait ['topic_10', 0.82162162162162156]
pos_price [('topic_12', 'topic_15'), 0.81066666666666665]
neg_price [('topic_5', 'topic_15'), 0.81933333333333325]
pos_serv [('topic_0', 'topic_3'), 0.76662648181635529]
neg_serv [('topic_5', 'topic_10'), 0.87834622248661509]
Done with n=18, b=0.2, c=0.02


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6358639
INFO:lda:<1999> log likelihood: -4413066
INFO:lda:<1999> log likelihood: -4398517
INFO:lda:<1999> log likelihood: -4427698


pos_food [('topic_2', 'topic_14'), 0.74211640211640217]
neg_food ['topic_7', 0.85329505058791355]
pos_wait [('topic_6', 'topic_14'), 0.7894540014958864]
neg_wait ['topic_15', 0.8464864864864865]
pos_price [('topic_13', 'topic_14'), 0.72111111111111115]
neg_price [('topic_7', 'topic_13'), 0.85888888888888892]
pos_serv [('topic_3', 'topic_14'), 0.74914607193088212]
neg_serv ['topic_15', 0.87418203450327181]
Done with n=20, b=0.1, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000


pos_food [('topic_8', 'topic_15'), 0.74285714285714288]
neg_food ['topic_14', 0.83743505605687718]
pos_wait [('topic_9', 'topic_15'), 0.81039640987284967]
neg_wait [('topic_10', 'topic_16'), 0.84405405405405398]
pos_price [('topic_12', 'topic_15'), 0.74711111111111117]
neg_price [('topic_14', 'topic_18'), 0.85799999999999998]
pos_serv [('topic_3', 'topic_15'), 0.73467952581876639]
neg_serv ['topic_10', 0.87224866151100544]
Done with n=20, b=0.08, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000


pos_food [('topic_5', 'topic_8'), 0.77089947089947086]
neg_food ['topic_6', 0.85985780694558378]
pos_wait [('topic_0', 'topic_11'), 0.77935676888556471]
neg_wait ['topic_10', 0.82702702702702702]
pos_price [('topic_13', 'topic_19'), 0.75111111111111106]
neg_price [('topic_6', 'topic_14'), 0.83355555555555549]
pos_serv [('topic_8', 'topic_17'), 0.75426964034558974]
neg_serv ['topic_10', 0.87447947650208213]
Done with n=20, b=0.1, c=0.02


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6311326
INFO:lda:<0> log likelihood: -6299156
INFO:lda:<0> log likelihood: -6237127
INFO:lda:<1000> log likelihood: -4409450
INFO:lda:<1000> log likelihood: -4419772
INFO:lda:<1000> log likelihood: -4407706
INFO:lda:<1000> log likelihood: -4461029
INFO:lda:<1999> log likelihood: -4406196


pos_food [('topic_1', 'topic_19'), 0.74730158730158724]
neg_food ['topic_18', 0.84837298331966093]
pos_wait [('topic_0', 'topic_19'), 0.7849663425579656]
neg_wait ['topic_15', 0.8424324324324326]
pos_price [('topic_12', 'topic_19'), 0.78200000000000003]
neg_price ['topic_18', 0.88955555555555543]
pos_serv [('topic_3', 'topic_19'), 0.73578460920233069]
neg_serv [('topic_10', 'topic_15'), 0.86258179654967282]
Done with n=20, b=0.08, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6221318
INFO:lda:<1999> log likelihood: -4414546
INFO:lda:<1999> log likelihood: -4407119


pos_food [('topic_5', 'topic_14'), 0.72359788359788357]
neg_food ['topic_13', 0.87284659557013944]
pos_wait [('topic_0', 'topic_19'), 0.77449513836948392]
neg_wait ['topic_12', 0.8472972972972973]
pos_price [('topic_15', 'topic_19'), 0.7877777777777778]
neg_price ['topic_13', 0.89266666666666661]
pos_serv [('topic_14', 'topic_17'), 0.75868997387984738]
neg_serv [('topic_10', 'topic_12'), 0.86377156454491377]
Done with n=20, b=0.1, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000


pos_food [('topic_17', 'topic_19'), 0.72074074074074079]
neg_food ['topic_13', 0.87886245556467046]
pos_wait [('topic_0', 'topic_19'), 0.79992520568436798]
neg_wait [('topic_6', 'topic_15'), 0.81594594594594583]
pos_price [('topic_4', 'topic_19'), 0.7771111111111112]
neg_price ['topic_13', 0.86555555555555563]
pos_serv [('topic_14', 'topic_19'), 0.72222222222222232]
neg_serv [('topic_6', 'topic_15'), 0.86302795954788813]
Done with n=20, b=0.08, c=0.02


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000
INFO:lda:<1999> log likelihood: -4453812


pos_food [('topic_8', 'topic_16'), 0.76582010582010585]
neg_food ['topic_7', 0.86614711512168452]
pos_wait [('topic_0', 'topic_8'), 0.81114435302916976]
neg_wait ['topic_15', 0.8532432432432433]
pos_price [('topic_8', 'topic_13'), 0.71711111111111114]
neg_price [('topic_7', 'topic_13'), 0.85822222222222222]
pos_serv [('topic_8', 'topic_11'), 0.74181233674904568]
neg_serv [('topic_5', 'topic_15'), 0.86332540154669835]
Done with n=20, b=0.2, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6454773
INFO:lda:<0> log likelihood: -6393536
INFO:lda:<0> log likelihood: -6404221
INFO:lda:<1000> log likelihood: -4468148
INFO:lda:<1000> log likelihood: -4417110
INFO:lda:<1000> log likelihood: -4421583
INFO:lda:<1000> log likelihood: -4437793
INFO:lda:<1999> log likelihood: -4463743


pos_food [('topic_1', 'topic_16'), 0.77449735449735446]
neg_food ['topic_5', 0.84905660377358494]
pos_wait [('topic_12', 'topic_16'), 0.78534031413612571]
neg_wait ['topic_6', 0.81918918918918915]
pos_price ['topic_18', 0.7546666666666666]
neg_price [('topic_5', 'topic_18'), 0.82911111111111113]
pos_serv [('topic_0', 'topic_16'), 0.75426964034558974]
neg_serv [('topic_6', 'topic_15'), 0.83938132064247473]
Done with n=20, b=0.2, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 20
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6177644
INFO:lda:<1999> log likelihood: -4412835


pos_food [('topic_1', 'topic_21'), 0.75301587301587314]
neg_food ['topic_20', 0.85643970467596398]
pos_wait [('topic_0', 'topic_21'), 0.83171278982797303]
neg_wait [('topic_2', 'topic_15'), 0.81837837837837846]
pos_price [('topic_18', 'topic_21'), 0.76177777777777778]
neg_price [('topic_18', 'topic_20'), 0.84555555555555562]
pos_serv ['topic_21', 0.72845087402049435]
neg_serv [('topic_2', 'topic_15'), 0.8596073765615706]
Done with n=22, b=0.08, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000
INFO:lda:<1999> log likelihood: -4419841
INFO:lda:<1999> log likelihood: -4431232


pos_food [('topic_0', 'topic_10'), 0.72507936507936521]
neg_food ['topic_20', 0.84905660377358494]
pos_wait [('topic_3', 'topic_10'), 0.78833208676140609]
neg_wait ['topic_15', 0.83918918918918917]
pos_price [('topic_17', 'topic_18'), 0.68822222222222229]
neg_price [('topic_17', 'topic_20'), 0.85599999999999987]
pos_serv [('topic_10', 'topic_18'), 0.74954792043399632]
neg_serv ['topic_15', 0.86421772754312909]
Done with n=22, b=0.08, c=0.02


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000


pos_food [('topic_0', 'topic_21'), 0.74899470899470899]
neg_food ['topic_20', 0.84837298331966093]
pos_wait [('topic_5', 'topic_12'), 0.76514584891548254]
neg_wait ['topic_15', 0.81270270270270273]
pos_price [('topic_12', 'topic_21'), 0.71777777777777785]
neg_price [('topic_3', 'topic_20'), 0.83933333333333338]
pos_serv [('topic_11', 'topic_21'), 0.74241510950371703]
neg_serv ['topic_15', 0.86867935752528258]
Done with n=22, b=0.1, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6438437
INFO:lda:<0> log likelihood: -6420557
INFO:lda:<0> log likelihood: -6359320
INFO:lda:<1000> log likelihood: -4488066
INFO:lda:<1000> log likelihood: -4420183
INFO:lda:<1000> log likelihood: -4434093
INFO:lda:<1000> log likelihood: -4445015
INFO:lda:<1999> log likelihood: -4485404


pos_food [('topic_5', 'topic_11'), 0.76582010582010573]
neg_food ['topic_4', 0.83866557287394028]
pos_wait [('topic_0', 'topic_11'), 0.76626776364996263]
neg_wait [('topic_10', 'topic_15'), 0.81135135135135128]
pos_price [('topic_14', 'topic_19'), 0.77244444444444438]
neg_price ['topic_4', 0.82511111111111113]
pos_serv [('topic_11', 'topic_17'), 0.73879847297568824]
neg_serv [('topic_10', 'topic_15'), 0.8576740035693039]
Done with n=20, b=0.2, c=0.02


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6322963
INFO:lda:<1999> log likelihood: -4413153


pos_food [('topic_0', 'topic_9'), 0.75164021164021166]
neg_food ['topic_8', 0.8632759092152037]
pos_wait [('topic_10', 'topic_12'), 0.78982797307404629]
neg_wait [('topic_5', 'topic_15'), 0.81837837837837835]
pos_price [('topic_6', 'topic_10'), 0.81311111111111112]
neg_price ['topic_8', 0.86066666666666669]
pos_serv [('topic_4', 'topic_9'), 0.73699015471167373]
neg_serv [('topic_5', 'topic_15'), 0.87581796549672819]
Done with n=22, b=0.08, c=0.01


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000
INFO:lda:<1999> log likelihood: -4432508
INFO:lda:<1999> log likelihood: -4441346


pos_food [('topic_0', 'topic_10'), 0.74497354497354495]
neg_food [('topic_5', 'topic_20'), 0.81036368608148757]
pos_wait [('topic_10', 'topic_21'), 0.7935676888556471]
neg_wait ['topic_15', 0.82324324324324327]
pos_price [('topic_10', 'topic_14'), 0.76822222222222214]
neg_price [('topic_17', 'topic_20'), 0.86888888888888882]
pos_serv [('topic_9', 'topic_10'), 0.71147277476391402]
neg_serv [('topic_2', 'topic_15'), 0.87611540749553829]
Done with n=22, b=0.1, c=0.008
pos_food [('topic_0', 'topic_11'), 0.73523809523809525]
pos_wait [('topic_11', 'topic_21'), 0.7890800299177263]
neg_food ['topic_20', 0.87544435329505066]
neg_wait [('topic_8', 'topic_15'), 0.82162162162162167]
pos_price [('topic_11', 'topic_18'), 0.69599999999999995]
neg_price [('topic_18', 'topic_20'), 0.84755555555555562]
pos_serv [('topic_4', 'topic_11'), 0.77225236085995586]
neg_serv [('topic_8', 'topic_15'), 0.87001784651992864]
Done with n=22, b=0.1, c=0.02


INFO:lda:<0> log likelihood: -6261725
INFO:lda:<1000> log likelihood: -4485020
INFO:lda:<1000> log likelihood: -4498453
INFO:lda:<1999> log likelihood: -4480158


pos_food [('topic_0', 'topic_10'), 0.75798941798941799]
neg_food ['topic_20', 0.90347279190593377]
pos_wait [('topic_18', 'topic_21'), 0.80628272251308908]
neg_wait ['topic_3', 0.83864864864864863]
pos_price [('topic_17', 'topic_18'), 0.81066666666666665]
neg_price ['topic_20', 0.8793333333333333]
pos_serv [('topic_4', 'topic_10'), 0.76200522403054061]
neg_serv [('topic_3', 'topic_15'), 0.85693039857227837]
Done with n=22, b=0.2, c=0.008


INFO:lda:n_documents: 9689
INFO:lda:vocab_size: 19491
INFO:lda:n_words: 616730
INFO:lda:n_topics: 22
INFO:lda:n_iter: 2000
INFO:lda:<0> log likelihood: -6306627
INFO:lda:<1999> log likelihood: -4501634


pos_food [('topic_0', 'topic_1'), 0.76582010582010585]
neg_food ['topic_20', 0.8431774678698386]
pos_wait [('topic_1', 'topic_21'), 0.77337322363500371]
neg_wait [('topic_3', 'topic_8'), 0.85081081081081078]
pos_price [('topic_1', 'topic_18'), 0.74888888888888894]
neg_price [('topic_18', 'topic_20'), 0.86333333333333329]
pos_serv [('topic_1', 'topic_11'), 0.76913803496081989]
neg_serv ['topic_3', 0.87299226650803097]
Done with n=22, b=0.2, c=0.02


INFO:lda:<1000> log likelihood: -4491068
INFO:lda:<1999> log likelihood: -4485576


pos_food [('topic_0', 'topic_10'), 0.77322751322751326]
neg_food ['topic_7', 0.86067815149029259]
pos_wait [('topic_10', 'topic_12'), 0.76664173522812251]
neg_wait ['topic_3', 0.84243243243243238]
pos_price [('topic_12', 'topic_19'), 0.77355555555555555]
neg_price ['topic_7', 0.85733333333333328]
pos_serv [('topic_4', 'topic_10'), 0.78923046011653608]
neg_serv [('topic_3', 'topic_15'), 0.85916121356335518]
Done with n=22, b=0.2, c=0.01


In [19]:
top_param_scores

[('n_topics: 18, alpha: 0.08, eta: 0.008',
  [('pos_food', [('topic_3', 'topic_16'), 0.73724867724867715]),
   ('neg_food', [('topic_2', 'topic_5'), 0.84276729559748442]),
   ('pos_wait', [('topic_3', 'topic_7'), 0.79842931937172767]),
   ('neg_wait', ['topic_10', 0.80972972972972979]),
   ('pos_price', [('topic_3', 'topic_12'), 0.7082222222222222]),
   ('neg_price', [('topic_5', 'topic_6'), 0.87688888888888883]),
   ('pos_serv', [('topic_0', 'topic_3'), 0.73447860156720912]),
   ('neg_serv', [('topic_5', 'topic_10'), 0.85707911957168359])]),
 ('n_topics: 18, alpha: 0.08, eta: 0.01',
  [('pos_food', [('topic_1', 'topic_15'), 0.70761904761904759]),
   ('neg_food', [('topic_7', 'topic_11'), 0.76278370248837846]),
   ('pos_wait', [('topic_3', 'topic_14'), 0.76963350785340312]),
   ('neg_wait', [('topic_5', 'topic_10'), 0.81918918918918915]),
   ('pos_price', [('topic_3', 'topic_12'), 0.72933333333333328]),
   ('neg_price', [('topic_6', 'topic_7'), 0.86022222222222222]),
   ('pos_serv', [(

In [20]:
for parameters, values in top_param_scores:
    vals = [value[1][1] for value in values]
    print "{}: mean: {:.3f}, hmean: {:.3f}, min-max: {:.3f}-{:.3f}".format(
        # ignore scores on ambiance and cleanliness - they were too infrequent
        parameters, np.mean(vals[:10]), hmean(vals[:10]), min(vals[:10]), max(vals[:10]))

n_topics: 18, alpha: 0.08, eta: 0.008: mean: 0.796, hmean: 0.791, min-max: 0.708-0.877
n_topics: 18, alpha: 0.08, eta: 0.01: mean: 0.784, hmean: 0.779, min-max: 0.708-0.876
n_topics: 18, alpha: 0.08, eta: 0.02: mean: 0.800, hmean: 0.797, min-max: 0.709-0.865
n_topics: 18, alpha: 0.1, eta: 0.008: mean: 0.801, hmean: 0.799, min-max: 0.727-0.852
n_topics: 18, alpha: 0.1, eta: 0.01: mean: 0.797, hmean: 0.793, min-max: 0.701-0.864
n_topics: 18, alpha: 0.1, eta: 0.02: mean: 0.807, hmean: 0.805, min-max: 0.753-0.852
n_topics: 18, alpha: 0.2, eta: 0.008: mean: 0.811, hmean: 0.809, min-max: 0.749-0.859
n_topics: 18, alpha: 0.2, eta: 0.01: mean: 0.793, hmean: 0.790, min-max: 0.712-0.856
n_topics: 18, alpha: 0.2, eta: 0.02: mean: 0.800, hmean: 0.797, min-max: 0.732-0.878
n_topics: 20, alpha: 0.08, eta: 0.008: mean: 0.812, hmean: 0.808, min-max: 0.736-0.890
n_topics: 20, alpha: 0.08, eta: 0.01: mean: 0.806, hmean: 0.802, min-max: 0.735-0.872
n_topics: 20, alpha: 0.08, eta: 0.02: mean: 0.805, hmean

In [22]:
# compare summary stats of parameters - negative only
for parameters, values in top_param_scores:
    vals = []
    for value in values:
        if value[0].split('_')[0] == 'neg':
            vals += [value[1][1]]
    print "{}: mean: {:.3f}, hmean: {:.3f}, min-max: {:.3f}-{:.3f}".format(
        parameters, np.mean(vals), hmean(vals), min(vals), max(vals))

n_topics: 18, alpha: 0.08, eta: 0.008: mean: 0.847, hmean: 0.846, min-max: 0.810-0.877
n_topics: 18, alpha: 0.08, eta: 0.01: mean: 0.830, hmean: 0.827, min-max: 0.763-0.876
n_topics: 18, alpha: 0.08, eta: 0.02: mean: 0.841, hmean: 0.840, min-max: 0.816-0.865
n_topics: 18, alpha: 0.1, eta: 0.008: mean: 0.833, hmean: 0.832, min-max: 0.815-0.852
n_topics: 18, alpha: 0.1, eta: 0.01: mean: 0.840, hmean: 0.840, min-max: 0.811-0.864
n_topics: 18, alpha: 0.1, eta: 0.02: mean: 0.843, hmean: 0.843, min-max: 0.834-0.852
n_topics: 18, alpha: 0.2, eta: 0.008: mean: 0.850, hmean: 0.850, min-max: 0.844-0.859
n_topics: 18, alpha: 0.2, eta: 0.01: mean: 0.838, hmean: 0.838, min-max: 0.809-0.856
n_topics: 18, alpha: 0.2, eta: 0.02: mean: 0.835, hmean: 0.834, min-max: 0.819-0.878
n_topics: 20, alpha: 0.08, eta: 0.008: mean: 0.861, hmean: 0.860, min-max: 0.842-0.890
n_topics: 20, alpha: 0.08, eta: 0.01: mean: 0.853, hmean: 0.853, min-max: 0.837-0.872
n_topics: 20, alpha: 0.08, eta: 0.02: mean: 0.856, hmean

The baseline model continues to be the best model.

## Best LDA:
- Number of topics: 20
- Alpha: 0.1
- Beta: 0.01