In [4]:
from pathlib import Path
from pprint import pprint

from docembedder.hyperopt.utils import ModelHyperopt
from docembedder.models import TfidfEmbedder, D2VEmbedder, CountVecEmbedder, BPembEmbedder, BERTEmbedder

from hyperopt import space_eval


In [5]:
# set general parameters
hype = ModelHyperopt(
        year_start=1850,
        year_end=1950,
        window_size=10,
        n_jobs=1,
        preprocessors=None,
        cpc_fp = Path("/data/index_files/GPCPCs.txt"),
        patent_dir = Path("/data/patents/"),
    )

In [None]:
# run optimization per model (this can take quite a while!)
hype.optimize_tfidf(max_evals=10)
hype.optimize_d2v(max_evals=10)
hype.optimize_countvec(max_evals=10)
hype.optimize_bpemp(max_evals=10)
hype.optimize_bert(max_evals=10)

# raw results
pprint(hype.best)

In [None]:
# print readable evaluation results
pprint(space_eval(TfidfEmbedder.hyper_space(),hype.best['tfidf']))
pprint(space_eval(D2VEmbedder.hyper_space(),hype.best['d2v']))
pprint(space_eval(CountVecEmbedder.hyper_space(),hype.best['countvec']))
pprint(space_eval(BPembEmbedder.hyper_space(),hype.best['bpemp']))
pprint(space_eval(BERTEmbedder.hyper_space(),hype.best['bert']))