In [2]:
import gc
from os.path import join
from constants import LDA_PATH, LSI_PATH, PARAMS, NBTOPICS, VERSIONS, DATASETS
import pandas as pd
from gensim.models import LdaModel, LsiModel
from utils import load
import json
%pylab inline
import seaborn as sns
from itertools import islice, chain

pd.options.display.max_columns = 42
pd.options.display.max_rows = 2000
pd.options.display.width = 800
pd.options.display.precision = 3

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [3]:
def save_topics(model, model_path, topn=20):
    columns = [f'term{x}' for x in range(topn)] + [f'weight{x}' for x in range(topn)]
    topics = model.show_topics(num_topics=-1, num_words=topn, formatted=False)
    topics = [list(chain(*zip(*topic[1]))) for topic in topics]
    topics = pd.DataFrame(topics, columns=columns)
    print(f'    saving topics to {model_path}.csv')
    topics.to_csv(f'{model_path}.csv')

In [4]:
# removing the callbacks from already trained ldamodels. 
# The callbacks take an unreasonable amount of space and will not be needed anymore.
# can also be used to check which models have already been trained.
# can save topics from models

slize = None
params = list(PARAMS) + [f'{p}_split' for p in PARAMS]
epochs = 30
do_save_topics = True

for dataset in islice(DATASETS.values(), slize):
    for version in VERSIONS[:1]:
        for corpus in ['bow', 'tfidf']:
            for param in params[:slize]:
                for nb in NBTOPICS[:slize]:
                    gc.collect()
                    d = join(LDA_PATH, version, corpus, param)
                    f = f'{dataset}_LDAmodel_{param}_{nb}_{"ep"+str(epochs) if epochs else ""}'
                    df = join(d, f)
                    print(df, end='')
                    try:
                        model = LdaModel.load(df)
                        print('  -- ok')
                        if do_save_topics:
                            save_topics(model, df)
                    except Exception as e: 
                        print('  -- ', e)
                    else:
                        if model.callbacks is not None:
                            model.callbacks = None
                            ds = join(d, 'small')
                            dsf = join(ds, f)
                            print('   saving ', dsf)
                            if not exists(ds): makedirs(ds)
                            model.save(dsf)
                            print('   loading', dsf, end='')
                            try:
                                LdaModel.load(dsf)
                                print('  -- ok')
                            except Exception as e: 
                                print('  -- ', e)
                        del model
                    gc.collect()

../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_10_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_10_ep30'
../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_25_ep30'
../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_50_ep30'
../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/bow/a42/dewac_LDAmodel_a42_100_ep30'
../data/preprocessed/LDAmodel/noun/bow/b42/dewac_LDAmodel_b42_10_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/bow/b42/dewac_LDAmodel_b42_10_ep30'
../data/preprocessed/LDAmodel/noun/bow/b42/dewac_LDAmodel_b42_25_ep30  --  [Er

../data/preprocessed/LDAmodel/noun/tfidf/c42/dewac_LDAmodel_c42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/c42/dewac_LDAmodel_c42_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/c42/dewac_LDAmodel_c42_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/c42/dewac_LDAmodel_c42_50_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/c42/dewac_LDAmodel_c42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/c42/dewac_LDAmodel_c42_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/d42/dewac_LDAmodel_d42_10_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/d42/dewac_LDAmodel_d42_10_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/d42/dewac_LDAmodel_d42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/d42/dewac_LDAmodel_d42_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/d42/dewac_LDAmode

../data/preprocessed/LDAmodel/noun/bow/c42/dewac1_LDAmodel_c42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/c42/dewac1_LDAmodel_c42_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42/dewac1_LDAmodel_d42_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/dewac1_LDAmodel_e42_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmod

../data/preprocessed/LDAmodel/noun/tfidf/d42/dewac1_LDAmodel_d42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/d42/dewac1_LDAmodel_d42_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_10_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_10_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_50_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/dewac1_LDAmodel_e42_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/a42_s

../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42_split/dewiki_LDAmodel_a42_split_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/b42_split/dewiki_LDAmodel_b42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/b42_split/dewiki_LDAmodel_b42_split_10_ep30.csv
../data/

../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_50_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/b42_split/dewiki_LDAmodel_b42_split_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/c42_split/dewiki_LDAmodel_c42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/

../data/preprocessed/LDAmodel/noun/bow/b42_split/Europarl_LDAmodel_b42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/b42_split/Europarl_LDAmodel_b42_split_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/c42_split/Europarl_LDAmodel_c42_split

../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_50_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/c42_split/Europarl_LDAmodel_c42_split_100_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/d42_split/Europarl_LDAmodel_d42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/d42_split/Europarl

../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42_split/FAZ_LDAmodel_d42_split_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42_split/FAZ_LDAmodel_e42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42_split/FAZ_LDAmodel_e42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow

../data/preprocessed/LDAmodel/noun/tfidf/d42_split/FAZ_LDAmodel_d42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/d42_split/FAZ_LDAmodel_d42_split_100_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_50_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/e42_split/FAZ_LDAmodel_e42_split_100_ep30.csv
../data/preproce

../data/preprocessed/LDAmodel/noun/bow/e42_split/FOCUS_LDAmodel_e42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42_split/FOCUS_LDAmodel_e42_split_100_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_10_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_10_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_50_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/a42/FOCUS_LDAmodel_a42_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/b42/FOC

../data/preprocessed/LDAmodel/noun/bow/a42/news_LDAmodel_a42_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42/news_LDAmodel_a42_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42/news_LDAmodel_a42_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42/news_LDAmodel_a42_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42/news_LDAmodel_a42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42/news_LDAmodel_a42_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/b42/news_LDAmodel_b42_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/b42/news_LDAmodel_b42_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/b42/news_LDAmodel_b42_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/b42/news_LDAmodel_b42_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/b42/news_LDAmodel_b42_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/b42/news_LDA

../data/preprocessed/LDAmodel/noun/tfidf/d42/news_LDAmodel_d42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/d42/news_LDAmodel_d42_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/d42/news_LDAmodel_d42_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/d42/news_LDAmodel_d42_50_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/d42/news_LDAmodel_d42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/d42/news_LDAmodel_d42_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/news_LDAmodel_e42_10_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/news_LDAmodel_e42_10_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/news_LDAmodel_e42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/news_LDAmodel_e42_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/news_LDAmodel_e42_50_ep

../data/preprocessed/LDAmodel/noun/bow/d42/OnlineParticipation_LDAmodel_d42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42/OnlineParticipation_LDAmodel_d42_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/OnlineParticipation_LDAmodel_e42_100_ep30.

../data/preprocessed/LDAmodel/noun/tfidf/e42/OnlineParticipation_LDAmodel_e42_25_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/OnlineParticipation_LDAmodel_e42_25_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/OnlineParticipation_LDAmodel_e42_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/OnlineParticipation_LDAmodel_e42_50_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/OnlineParticipation_LDAmodel_e42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/OnlineParticipation_LDAmodel_e42_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/OnlineParticipation_LDAmodel_a42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/a42_split/OnlineParticipation_LDAmodel_a42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/OnlineParticipation_LDAmodel_a42_split_25_ep30  -- ok
    saving topics

../data/preprocessed/LDAmodel/noun/bow/d42/PoliticalSpeeches_LDAmodel_d42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/d42/PoliticalSpeeches_LDAmodel_d42_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/PoliticalSpeeches_LDAmodel_e42_100_ep30.csv
../data/preproce

../data/preprocessed/LDAmodel/noun/tfidf/e42/PoliticalSpeeches_LDAmodel_e42_50_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/PoliticalSpeeches_LDAmodel_e42_50_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/e42/PoliticalSpeeches_LDAmodel_e42_100_ep30  --  [Errno 2] No such file or directory: '../data/preprocessed/LDAmodel/noun/tfidf/e42/PoliticalSpeeches_LDAmodel_e42_100_ep30'
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/PoliticalSpeeches_LDAmodel_a42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/a42_split/PoliticalSpeeches_LDAmodel_a42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/PoliticalSpeeches_LDAmodel_a42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/a42_split/PoliticalSpeeches_LDAmodel_a42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/PoliticalSpeeches_LDAmodel_a42_split_50_ep30  -- ok
    saving topics to ..

../data/preprocessed/LDAmodel/noun/bow/e42/speeches_LDAmodel_e42_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/speeches_LDAmodel_e42_50_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/e42/speeches_LDAmodel_e42_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/e42/speeches_LDAmodel_e42_100_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42_split/speeches_LDAmodel_a42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42_split/speeches_LDAmodel_a42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42_split/speeches_LDAmodel_a42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42_split/speeches_LDAmodel_a42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/bow/a42_split/speeches_LDAmodel_a42_split_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/bow/a42_split/speeches_LDAmodel_a42_split_50_ep30.csv
../data/preprocessed/LDAmodel/noun/b

../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_10_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_25_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_25_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_50_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_50_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_100_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/a42_split/speeches_LDAmodel_a42_split_100_ep30.csv
../data/preprocessed/LDAmodel/noun/tfidf/b42_split/speeches_LDAmodel_b42_split_10_ep30  -- ok
    saving topics to ../data/preprocessed/LDAmodel/noun/tfidf/b42_split/speeches

In [None]:
# LSI
slize = None
do_save_topics = True

for version in VERSIONS[:slize]:
    for corpus in ['bow', 'tfidf']:
        for dataset in islice(DATASETS.values(), slize):
            for nb in NBTOPICS + (200,):
                gc.collect()
                d = join(LSI_PATH, version, corpus)
                f = f'{dataset}_LSImodel_{nb}'
                df = join(d, f)
                print(df, end='')
                try:
                    model = LsiModel.load(df)
                    print('  -- ok')
                    if do_save_topics:
                        save_topics(model, df)
                except Exception as e: 
                    print('  -- ', e)
                gc.collect()

In [None]:
from topic_reranking import TopicsLoader

dataset = 'O'
param_ids = PARAMS
nbs_topics = NBTOPICS
version = 'noun'
corpus = 'bow'

tl = TopicsLoader(
    dataset=dataset,
    param_ids=param_ids, 
    nbs_topics=nbs_topics,
    version=version,
    corpus_type=corpus, 
    topn=20, 
    filter_bad_terms=True,
    include_weights=True
)

In [None]:
tl.topics