In [1]:
import io
import pandas as pd
import gensim

In [2]:
from stability import *
from stablelda import StableLDA

#### train a topic model of 25 topics on the stackexchange dataset using Stable LDA 

In [3]:
bow_file = 'data/stackexchange.bow'
vocab_file = 'data/stackexchange.vocab'

num_topics = 25
num_words = 5000
alpha, beta, eta = 1, 0.01, 1000
epochs = 5
rand_seed = 42
output_dir = 'data/output/'

In [4]:
stablelda = StableLDA(num_topics, num_words, alpha, beta, eta, rand_seed, output_dir)
stablelda.train(bow_file, vocab_file, epochs)

docs, vocab, theta, phi = load_topic_model_results(bow_file, vocab_file,
                                                     output_dir+'theta.dat', output_dir+'phi.dat')
tm = TopicModel(num_topics, theta, phi, docs, vocab)

# tm.print_top_n_words(10)

--------running Stable LDA model----------
--------- loading data ----------------
train -f data/stackexchange.bow -v data/stackexchange.vocab -c data/output/cluster.dat -z data/output/z.dat -t 25 -w 5000 -a 1 -b 0.01 -e 1000 -n 5 -r 42 -o data/output/


#### compute model perplexity

In [5]:
compute_perlexity(docs, theta, phi)

compute likelihood
likelihood: -14973562.224153275
perplexity: 820.8899806129051


820.8899806129051

#### compute model coherence.
we use Gensim's coherence method. We need to prepare gensim_bow, and id2word

In [6]:
topics = tm.get_top_n_words(10)

In [7]:
#### read in raw text data -- used for windows-based topic coherence measure
with io.open(bow_file, 'r', encoding='utf-8') as f:
    texts = [line.split() for line in f.read().splitlines()]

In [8]:
#### prepare gensim_bow and id2word
id2word = gensim.corpora.Dictionary(texts)
gensim_bow = [id2word.doc2bow(text) for text in texts]

In [9]:
print('topic coherence c_uci', compute_coherence(gensim_bow, texts, id2word, topics, coherence_score='c_uci') )

topic coherence c_uci 0.577670723812638


In [10]:
print('topic coherence c_v', compute_coherence(gensim_bow, texts, id2word, topics, coherence_score='c_v') )

topic coherence c_v 0.5747375848105956


In [18]:
tm.print_top_n_words(10)

damag kill attack enemi hit weapon target rang shield deal
block place activ spawn remov test line stand mob portal
good lot better best make usual fight pretti hard easi
time need start try second abl get turn got happen
like wai want know thing help possibl sure think problem
dragon dark soul heart defeat form king good super hunter
unit destroi forc defens oppon base tower tank hero air
skill armor spell hand equip magic enchant monster potion sword
strong level item drop class normal gold card chest pack
game new account updat avail origin access onlin old free
point charact complet end quest mission unlock order achiev main
bui upgrad pick store trade weapon cost monei resourc sell
area room wall water head near door path insid ground
code pre command minecraft type quot add count tag score
save version server file steam world mod creat instal download
set alt look right option map open kbd click allow
effect power increas number chanc speed mean health high stat
control connect x