Skip to content

Commit

Permalink
minor improvements and fixes in topicmod. examples
Browse files Browse the repository at this point in the history
  • Loading branch information
internaut committed Apr 19, 2023
1 parent ae39c02 commit e82a646
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ doc/source/data/corpus_norm.pickle
.coverage
examples/data/aclImdb_v1.tar.gz
venv
examples/data/topicmod_evaluate_*.png
9 changes: 9 additions & 0 deletions examples/topicmod_ap_nips_eval.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
"""
Topic model evaluation for AP and NIPS datasets (http://archive.ics.uci.edu/ml/datasets/Bag+of+Words).
Run as:
python topicmod_ap_nips_eval.py <dataset> <num. workers> <eta> <alpha numerator>
Where ``<dataset>`` is either ``data/ap.pickle`` or ``data/nips.pickle``, ``<num. workers>`` is the number of worker
processes to be used (should be >= 1 and <= the number of CPU cores in your machine), eta is the LDA eta (a.k.a. "beta")
parameter (should be in range (0, 1]) and ``<alpha numerator>`` is used for calculating the LDA alpha parameter as
``<alpha numerator> / K`` where K is the number of topics.
This examples requires that you have installed tmtoolkit with the "lda" package.
pip install -U "tmtoolkit[lda]"
Expand Down
4 changes: 3 additions & 1 deletion examples/topicmod_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

if os.path.exists(corp_picklefile):
docs = load_corpus_from_picklefile(corp_picklefile)
docs.max_workers = 1.0
else:
docs = Corpus.from_builtin_corpus('en-NewsArticles', max_workers=1.0)
save_corpus_to_picklefile(docs, corp_picklefile)
Expand Down Expand Up @@ -89,6 +90,7 @@

if os.path.exists(corp_preproc_picklefile):
docs = load_corpus_from_picklefile(corp_preproc_picklefile)
docs.max_workers = 1.0
else:
remove_punctuation(docs)
corpus_retokenize(docs)
Expand Down Expand Up @@ -119,7 +121,7 @@

eval_res_picklefile = 'data/topicmod_lda_eval_res.pickle'

if os.path.exists(dtm_picklefile):
if os.path.exists(eval_res_picklefile):
eval_results = unpickle_file(eval_res_picklefile)
else:
const_params = {
Expand Down
8 changes: 7 additions & 1 deletion tmtoolkit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,9 +765,15 @@ def check_context_size(context_size: Union[int, Tuple[int, int], List[int]]) ->


if find_spec('rpy2') is not None:
# silence R startup warnings
# silence R console writes (but store original functions for manual restoring as `rpy2_default_*`
import rpy2.rinterface_lib.callbacks

rpy2_default_consolewrite_warnerror = rpy2.rinterface_lib.callbacks.consolewrite_warnerror
rpy2.rinterface_lib.callbacks.consolewrite_warnerror = (lambda *args: None)
rpy2_default_consolewrite_print = rpy2.rinterface_lib.callbacks.consolewrite_print
rpy2.rinterface_lib.callbacks.consolewrite_print = (lambda *args: None)
rpy2_default_showmessage = rpy2.rinterface_lib.callbacks.showmessage
rpy2.rinterface_lib.callbacks.showmessage = (lambda *args: None)

import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
Expand Down

0 comments on commit e82a646

Please sign in to comment.