In [116]:
from pathlib import Path
import pickle
import os

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from navec import Navec

import pandas as pd
import nltk
import torch

from deepxml.dataset import MultiLabelDataset
from deepxml.evaluation import get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10
from deepxml.models import Model

from models.preprocessing import texts_preprocessing
from models.lda_correction_network import LDACorrectionNet, CorNetLDACorrectionNet, LDACorrectionNetLarge, CorNetLDACorrectionNetLarge
from models.lda_encoders import LDAEmbeddings

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /home/daniil/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/daniil/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
torch.cuda.is_available()

True

## Preprocessing Data

In [4]:
df = pd.read_parquet("../data/habr_posts_dataset.parquet")

In [5]:
df.head()

Unnamed: 0,post_id,author,title,tags,text
0,807711,Kaspersky_Lab,Security Week 2416: уязвимость в серверных мат...,"[Блог компании «Лаборатория Касперского», Инфо...",На прошлой неделе исследователи компании Binar...
1,807709,markshevchenko,Вычислительные выражения: Подробнее про типы-о...,"[.NET, Функциональное программирование, F#]",В предыдущем посте мы познакомились с концепци...
2,807707,ru_vds,Угадай местоположение льдины с арктическим ЦОД...,"[Блог компании RUVDS.com, Хостинг, Системное а...","Как вы наверняка знаете, 12 апреля RUVDS успеш..."
3,807705,shaddyk,Запустили проект с НСИС по повышению качества ...,"[Блог компании HFLabs, Открытые данные, IT-ком...",НСИС — оператор единой автоматизированной инфо...
4,807703,VokaMut,Тестируем AI на создании прикладного приложения,"[Веб-разработка, Искусственный интеллект, Natu...","Всем привет, я Григорий Тумаков, CTO в Моризо ..."


In [6]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

In [7]:
train_texts = texts_preprocessing(train_df["text"].to_list())
val_texts = texts_preprocessing(val_df["text"].to_list())
test_texts = texts_preprocessing(test_df["text"].to_list())

In [8]:
mlb = MultiLabelBinarizer(sparse_output=True)
train_labels = mlb.fit_transform(train_df["tags"].to_list())
val_labels = mlb.transform(val_df["tags"].to_list())
test_labels = mlb.transform(test_df["tags"].to_list())



In [9]:
results_df = pd.DataFrame(columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])

### LDA Embeddings (num_topic=num_labels)

In [10]:
lda_embs = LDAEmbeddings(train_labels.shape[1])

In [11]:
%%time
train_embs = lda_embs.fit_transform(train_texts)
val_embs = lda_embs.transform(val_texts)
test_embs = lda_embs.transform(test_texts)

CPU times: user 8min 37s, sys: 14min 8s, total: 22min 45s
Wall time: 3min 56s


In [12]:
train_loader = DataLoader(MultiLabelDataset(train_embs, train_labels),
                          8, shuffle=True)
val_loader = DataLoader(MultiLabelDataset(val_embs, val_labels),
                          8, shuffle=False)
test_loader = DataLoader(MultiLabelDataset(test_embs, test_labels),
                          8, shuffle=False)

In [13]:
model = Model(network=LDACorrectionNet,
              emb_size=300, num_labels=train_labels.shape[1], num_topics=train_labels.shape[1])

In [14]:
%%time
model.train(train_loader, val_loader, optim_params={"lr":1e-2})

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at ../torch/csrc/utils/python_arg_parser.cpp:1578.)
  exp_avg.mul_(beta1).add_(1 - beta1, grad)


0 800 train loss: 0.0937443 valid loss: 0.0331649 P@5: 0.07286 N@5: 0.10954 early stop: 0
0 1600 train loss: 0.0317167 valid loss: 0.0307127 P@5: 0.07371 N@5: 0.10701 early stop: 0
0 2400 train loss: 0.0297573 valid loss: 0.0301985 P@5: 0.08114 N@5: 0.11558 early stop: 0
1 400 train loss: 0.0302892 valid loss: 0.0299662 P@5: 0.08114 N@5: 0.11212 early stop: 0
1 1200 train loss: 0.0301741 valid loss: 0.0300037 P@5: 0.06229 N@5: 0.09790 early stop: 0
1 2000 train loss: 0.0293873 valid loss: 0.0299897 P@5: 0.07514 N@5: 0.11006 early stop: 0
1 2800 train loss: 0.0300107 valid loss: 0.0299394 P@5: 0.07257 N@5: 0.10384 early stop: 0
2 800 train loss: 0.0297223 valid loss: 0.0299269 P@5: 0.07629 N@5: 0.11371 early stop: 0
2 1600 train loss: 0.0296573 valid loss: 0.0299116 P@5: 0.07686 N@5: 0.11344 early stop: 0
2 2400 train loss: 0.0301420 valid loss: 0.0299866 P@5: 0.08343 N@5: 0.11638 early stop: 0
3 400 train loss: 0.0297882 valid loss: 0.0299826 P@5: 0.07600 N@5: 0.11478 early stop: 0
3 1

In [15]:
test_res = model.predict(test_loader)

Predict:   0%|          | 0/110 [00:00<?, ?it/s]

                                                

In [16]:
metrics = [metric(test_res[1], test_labels) for metric in [get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10]]
metrics

[0.3447488584474886,
 0.20753424657534247,
 0.13858447488584474,
 0.3447488584474886,
 0.33397402221971173,
 0.3848880820359636]

In [17]:
def save_model(lda_embs, model, name):
    os.mkdir(f"../data/models/{name}/")
    with open(Path(f"../data/models/{name}/model.pickle"), "wb") as f:
        pickle.dump(model, f)
    with open(Path(f"../data/models/{name}/lda_embs.pickle"), "wb") as f:
        pickle.dump(lda_embs, f)

In [25]:
save_model(lda_embs, model, "LDA(num_topics=num_labrls)")

In [18]:
results_df = pd.concat([results_df, 
                        pd.DataFrame([["CorNetLDACorrectionNet"]+metrics+["3min 56s + 2min 57s"]+["1.7 Gb + 7.8 Mb"]], 
                                     columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])])

  results_df = pd.concat([results_df,


In [85]:
model = Model(network=CorNetLDACorrectionNet,
              emb_size=300, num_labels=train_labels.shape[1], num_topics=train_labels.shape[1])

In [86]:
%%time
model.train(train_loader, val_loader, optim_params={"lr":1e-2})

0 800 train loss: 0.0688915 valid loss: 0.0309646 P@5: 0.07771 N@5: 0.11519 early stop: 0
0 1600 train loss: 0.0304047 valid loss: 0.0303425 P@5: 0.07886 N@5: 0.11676 early stop: 0
0 2400 train loss: 0.0302113 valid loss: 0.0304084 P@5: 0.06743 N@5: 0.08246 early stop: 0
1 400 train loss: 0.0303187 valid loss: 0.0306888 P@5: 0.06371 N@5: 0.09674 early stop: 0
1 1200 train loss: 0.0306011 valid loss: 0.0302712 P@5: 0.07114 N@5: 0.10336 early stop: 0
1 2000 train loss: 0.0298866 valid loss: 0.0303687 P@5: 0.06800 N@5: 0.10002 early stop: 0
1 2800 train loss: 0.0306319 valid loss: 0.0300972 P@5: 0.07600 N@5: 0.11225 early stop: 0
2 800 train loss: 0.0300871 valid loss: 0.0300718 P@5: 0.07800 N@5: 0.11066 early stop: 0
2 1600 train loss: 0.0298875 valid loss: 0.0302885 P@5: 0.06886 N@5: 0.09713 early stop: 0
2 2400 train loss: 0.0310915 valid loss: 0.0302050 P@5: 0.06743 N@5: 0.10119 early stop: 0
3 400 train loss: 0.0297334 valid loss: 0.0302089 P@5: 0.07829 N@5: 0.10966 early stop: 0
3 1

In [87]:
test_res = model.predict(test_loader)

                                                            

In [88]:
metrics = [metric(test_res[1], test_labels) for metric in [get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10]]
metrics

[0.2888127853881279,
 0.17990867579908676,
 0.12773972602739725,
 0.2888127853881279,
 0.2868693589903636,
 0.3424568924843919]

In [None]:
save_model(lda_embs, model, "LDA(num_topics=num_labrls) + CorNet")

In [89]:
results_df = pd.concat([results_df, 
                        pd.DataFrame([["CorNetLDACorrectionNet"]+metrics+["3min 17s + 2min 11s"]+["1.7 Gb + 10 Mb"]], 
                                     columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])])

### LDA(num_topics=300)

In [99]:
%%time
lda_embs = LDAEmbeddings(300)

train_embs = lda_embs.fit_transform(train_texts)
val_embs = lda_embs.transform(val_texts)
test_embs = lda_embs.transform(test_texts)

train_loader = DataLoader(MultiLabelDataset(train_embs, train_labels),
                          8, shuffle=True)
val_loader = DataLoader(MultiLabelDataset(val_embs, val_labels),
                          8, shuffle=False)
test_loader = DataLoader(MultiLabelDataset(test_embs, test_labels),
                          8, shuffle=False)

CPU times: user 2min 52s, sys: 4min 24s, total: 7min 16s
Wall time: 1min 17s


In [100]:
model = Model(network=LDACorrectionNet, 
              emb_size=300, num_labels=train_labels.shape[1], num_topics=300)

In [101]:
%%time
model.train(train_loader, val_loader)

0 800 train loss: 0.0922166 valid loss: 0.0330804 P@5: 0.08343 N@5: 0.11780 early stop: 0
0 1600 train loss: 0.0319736 valid loss: 0.0307707 P@5: 0.07629 N@5: 0.11121 early stop: 0
0 2400 train loss: 0.0303546 valid loss: 0.0301470 P@5: 0.08029 N@5: 0.11029 early stop: 0
1 400 train loss: 0.0299066 valid loss: 0.0301522 P@5: 0.08057 N@5: 0.11473 early stop: 0
1 1200 train loss: 0.0297687 valid loss: 0.0300357 P@5: 0.07514 N@5: 0.11416 early stop: 0
1 2000 train loss: 0.0298948 valid loss: 0.0298915 P@5: 0.08114 N@5: 0.10794 early stop: 0
1 2800 train loss: 0.0302858 valid loss: 0.0299329 P@5: 0.07114 N@5: 0.10819 early stop: 0
2 800 train loss: 0.0293881 valid loss: 0.0301162 P@5: 0.07600 N@5: 0.10943 early stop: 0
2 1600 train loss: 0.0292202 valid loss: 0.0300098 P@5: 0.07914 N@5: 0.10702 early stop: 0
2 2400 train loss: 0.0305586 valid loss: 0.0299587 P@5: 0.06914 N@5: 0.10423 early stop: 0
3 400 train loss: 0.0308814 valid loss: 0.0298849 P@5: 0.08143 N@5: 0.11231 early stop: 0
3 1

In [104]:
test_res = model.predict(test_loader)

Predict:   0%|          | 0/110 [00:00<?, ?it/s]

                                                          

In [105]:
metrics = [metric(test_res[1], test_labels) for metric in [get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10]]
metrics

[0.3139269406392694,
 0.18881278538812785,
 0.12990867579908677,
 0.3139269406392694,
 0.3007988194140575,
 0.3532950178017858]

In [96]:
save_model(lda_embs, model, "LDA(num_topics=300)")

In [106]:
results_df = pd.concat([results_df, 
                        pd.DataFrame([["LDACorrectionNet(num_topics=300)"]+metrics+["1min 17s + 1min 35s"]+["836 Mb + 8 Mb"]], 
                                     columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])])

## LDACorrectionNetLarge

In [107]:
lda_embs = LDAEmbeddings(train_labels.shape[1])

train_embs = lda_embs.fit_transform(train_texts)
val_embs = lda_embs.transform(val_texts)
test_embs = lda_embs.transform(test_texts)

train_loader = DataLoader(MultiLabelDataset(train_embs, train_labels),
                          8, shuffle=True)
val_loader = DataLoader(MultiLabelDataset(val_embs, val_labels),
                          8, shuffle=False)
test_loader = DataLoader(MultiLabelDataset(test_embs, test_labels),
                          8, shuffle=False)

In [109]:
model = Model(network=LDACorrectionNetLarge,
              emb_size=600, num_labels=train_labels.shape[1], num_topics=train_labels.shape[1], hidden_states=[900, 1500, 900])

In [110]:
model.train(train_loader, val_loader, opt_params={"lr": 1e-4}, nb_epoch=50)

0 800 train loss: 0.1989133 valid loss: 0.0463999 P@5: 0.07171 N@5: 0.10479 early stop: 0
0 1600 train loss: 0.0580269 valid loss: 0.0364868 P@5: 0.06029 N@5: 0.07257 early stop: 0
0 2400 train loss: 0.0440270 valid loss: 0.0334983 P@5: 0.06829 N@5: 0.09986 early stop: 0
1 400 train loss: 0.0364868 valid loss: 0.0314669 P@5: 0.08771 N@5: 0.12554 early stop: 0
1 1200 train loss: 0.0316054 valid loss: 0.0312331 P@5: 0.07200 N@5: 0.11445 early stop: 0
1 2000 train loss: 0.0324573 valid loss: 0.0310655 P@5: 0.08771 N@5: 0.12237 early stop: 0
1 2800 train loss: 0.0317667 valid loss: 0.0304204 P@5: 0.08971 N@5: 0.13921 early stop: 0
2 800 train loss: 0.0303106 valid loss: 0.0302631 P@5: 0.09029 N@5: 0.12829 early stop: 0
2 1600 train loss: 0.0303209 valid loss: 0.0299626 P@5: 0.09029 N@5: 0.14422 early stop: 0
2 2400 train loss: 0.0303481 valid loss: 0.0297050 P@5: 0.09486 N@5: 0.14484 early stop: 0
3 400 train loss: 0.0298457 valid loss: 0.0295077 P@5: 0.09829 N@5: 0.14074 early stop: 0
3 1

In [111]:
test_res = model.predict(test_loader)

Predict:   0%|          | 0/110 [00:00<?, ?it/s]

                                                          

In [112]:
metrics = [metric(test_res[1], test_labels) for metric in [get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10]]
metrics

[0.3698630136986301,
 0.21484018264840182,
 0.13926940639269406,
 0.3698630136986301,
 0.3510672088705668,
 0.3984654584139152]

In [113]:
save_model(lda_embs, model, "LargeLDA(num_topics=num_labels)")

In [114]:
results_df = pd.concat([results_df, 
                        pd.DataFrame([["LDACorrectionNetLarge"]+metrics+["3min 56s + 4min 48s"]+["1.7 Gb + 97 Mb"]], 
                                     columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])])

In [117]:
model = Model(network=CorNetLDACorrectionNetLarge,
              emb_size=600, num_labels=train_labels.shape[1], num_topics=train_labels.shape[1], hidden_states=[900, 1500, 900])

In [118]:
model.train(train_loader, val_loader, opt_params={"lr": 1e-4}, nb_epoch=50)

0 800 train loss: 0.1602747 valid loss: 0.0383697 P@5: 0.07029 N@5: 0.09374 early stop: 0
0 1600 train loss: 0.0405256 valid loss: 0.0339828 P@5: 0.06657 N@5: 0.09521 early stop: 0
0 2400 train loss: 0.0369752 valid loss: 0.0315858 P@5: 0.07600 N@5: 0.11549 early stop: 0
1 400 train loss: 0.0323199 valid loss: 0.0306272 P@5: 0.08686 N@5: 0.12361 early stop: 0
1 1200 train loss: 0.0312475 valid loss: 0.0301715 P@5: 0.08486 N@5: 0.13393 early stop: 0
1 2000 train loss: 0.0309072 valid loss: 0.0299855 P@5: 0.09429 N@5: 0.14447 early stop: 0
1 2800 train loss: 0.0306725 valid loss: 0.0294978 P@5: 0.10543 N@5: 0.16383 early stop: 0
2 800 train loss: 0.0297433 valid loss: 0.0293379 P@5: 0.11029 N@5: 0.17198 early stop: 0
2 1600 train loss: 0.0293589 valid loss: 0.0293193 P@5: 0.10029 N@5: 0.15905 early stop: 0
2 2400 train loss: 0.0294809 valid loss: 0.0287468 P@5: 0.11429 N@5: 0.18663 early stop: 0
3 400 train loss: 0.0287873 valid loss: 0.0287276 P@5: 0.12086 N@5: 0.18757 early stop: 0
3 1

In [119]:
test_res = model.predict(test_loader)

Predict:   0%|          | 0/110 [00:00<?, ?it/s]

                                                           

In [120]:
metrics = [metric(test_res[1], test_labels) for metric in [get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10]]
metrics

[0.3550228310502283,
 0.20821917808219179,
 0.138013698630137,
 0.3550228310502283,
 0.3415911468457437,
 0.39143371266886173]

In [None]:
save_model(lda_embs, model, "LargeLDA(num_topics=num_labels) + CorNet")

In [121]:
results_df = pd.concat([results_df, 
                        pd.DataFrame([["CorNetLDACorrectionNetLarge"]+metrics+["3min 56s + 4min 56s"]+["1.7 Gb + 103 Mb"]], 
                                     columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])])

### Add initialization from navek embbandings

In [122]:
navec = Navec.load("../data/navec_hudlit_v1_12B_500K_300d_100q.tar")

In [123]:
topics_embs = lda_embs.get_mean_topic_embandings(navec)

100%|██████████| 548/548 [03:38<00:00,  2.51it/s]


In [124]:
model = Model(network=LDACorrectionNet,
              emb_size=300, num_labels=train_labels.shape[1], num_topics=train_labels.shape[1], init_embs=topics_embs)

In [125]:
%%time
model.train(train_loader, val_loader, opt_params={"lr": 1e-3})

0 800 train loss: 0.0948657 valid loss: 0.0332008 P@5: 0.08343 N@5: 0.11876 early stop: 0
0 1600 train loss: 0.0315414 valid loss: 0.0307066 P@5: 0.07514 N@5: 0.11036 early stop: 0
0 2400 train loss: 0.0308744 valid loss: 0.0303563 P@5: 0.07257 N@5: 0.10521 early stop: 0
1 400 train loss: 0.0293636 valid loss: 0.0301267 P@5: 0.07200 N@5: 0.10399 early stop: 0
1 1200 train loss: 0.0301600 valid loss: 0.0299820 P@5: 0.07200 N@5: 0.10711 early stop: 0
1 2000 train loss: 0.0294593 valid loss: 0.0299008 P@5: 0.08457 N@5: 0.11592 early stop: 0
1 2800 train loss: 0.0302023 valid loss: 0.0298478 P@5: 0.07314 N@5: 0.10820 early stop: 0
2 800 train loss: 0.0297940 valid loss: 0.0300501 P@5: 0.08229 N@5: 0.11867 early stop: 0
2 1600 train loss: 0.0301253 valid loss: 0.0299185 P@5: 0.07571 N@5: 0.10692 early stop: 0
2 2400 train loss: 0.0299702 valid loss: 0.0299361 P@5: 0.06200 N@5: 0.08502 early stop: 0
3 400 train loss: 0.0293192 valid loss: 0.0299867 P@5: 0.07429 N@5: 0.10828 early stop: 0
3 1

In [126]:
test_res = model.predict(test_loader)

                                                

In [127]:
metrics = [metric(test_res[1], test_labels) for metric in [get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10]]
metrics

[0.3641552511415525,
 0.20684931506849316,
 0.14132420091324202,
 0.3641552511415525,
 0.3372622402061743,
 0.39297114342816125]

In [None]:
save_model(lda_embs, model, "LDA(num_topics=num_labels) + navek_init")

In [128]:
results_df = pd.concat([results_df, 
                        pd.DataFrame([["LDACorrectionNet with init"]+metrics+["3min 56s + 3min 39s + 1min 30s"]+["1.7 Gb + 8 Mb"]], 
                                     columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])])

In [138]:
model = Model(network=LDACorrectionNetLarge,
              emb_size=300, num_labels=train_labels.shape[1], num_topics=train_labels.shape[1], hidden_states=[900, 1500, 900],
              init_embs=topics_embs)

In [139]:
%%time
model.train(train_loader, val_loader, opt_params={"lr": 1e-4}, nb_epoch=50)

0 800 train loss: 0.1099526 valid loss: 0.0304924 P@5: 0.07000 N@5: 0.10580 early stop: 0
0 1600 train loss: 0.0307205 valid loss: 0.0305396 P@5: 0.07429 N@5: 0.11198 early stop: 0
0 2400 train loss: 0.0305679 valid loss: 0.0305632 P@5: 0.06714 N@5: 0.07909 early stop: 0
1 400 train loss: 0.0308407 valid loss: 0.0301646 P@5: 0.07600 N@5: 0.10585 early stop: 0
1 1200 train loss: 0.0303319 valid loss: 0.0304293 P@5: 0.07143 N@5: 0.10702 early stop: 0
1 2000 train loss: 0.0303538 valid loss: 0.0301880 P@5: 0.06486 N@5: 0.09751 early stop: 0
1 2800 train loss: 0.0307772 valid loss: 0.0304892 P@5: 0.06600 N@5: 0.09786 early stop: 0
2 800 train loss: 0.0308837 valid loss: 0.0302520 P@5: 0.08114 N@5: 0.10625 early stop: 0
2 1600 train loss: 0.0296426 valid loss: 0.0302876 P@5: 0.07857 N@5: 0.11881 early stop: 0
2 2400 train loss: 0.0305746 valid loss: 0.0302469 P@5: 0.05886 N@5: 0.09439 early stop: 0
3 400 train loss: 0.0299799 valid loss: 0.0301903 P@5: 0.06114 N@5: 0.08823 early stop: 0
3 1

In [140]:
test_res = model.predict(test_loader)

                                                           

In [141]:
metrics = [metric(test_res[1], test_labels) for metric in [get_p_1, get_p_5, get_p_10, get_n_1, get_n_5, get_n_10]]
metrics

[0.3162100456621005,
 0.18812785388127853,
 0.12990867579908677,
 0.3162100456621005,
 0.3005792302593037,
 0.3526834525810236]

In [143]:
save_model(lda_embs, model, "LargeLDA(num_topics=num_labels) + navek_init")

In [144]:
results_df = pd.concat([results_df, 
                        pd.DataFrame([["LDACorrectionNetLarge with init"]+metrics+["3min 56s + 3min 39s + 4min 23s"]+["1.7 Gb + 97 Mb"]], 
                                     columns=["model_name", "P@1", "P@5", "P@10", "N@1", "N@5", "N@10", "time", "size"])])

In [151]:
results_df

Unnamed: 0,model_name,P@1,P@5,P@10,N@1,N@5,N@10,time,size
0,LDACorrectionNet,0.344749,0.207534,0.138584,0.344749,0.333974,0.384888,3min 56s + 2min 57s,1.7 Gb + 7.8 Mb
0,CorNetLDACorrectionNet,0.288813,0.179909,0.12774,0.288813,0.286869,0.342457,3min 17s + 2min 11s,1.7 Gb + 10 Mb
0,LDACorrectionNet(num_topics=300),0.313927,0.188813,0.129909,0.313927,0.300799,0.353295,1min 17s + 1min 35s,836 Mb + 8 Mb
0,LDACorrectionNetLarge,0.369863,0.21484,0.139269,0.369863,0.351067,0.398465,3min 56s + 4min 48s,1.7 Gb + 97 Mb
0,CorNetLDACorrectionNetLarge,0.355023,0.208219,0.138014,0.355023,0.341591,0.391434,3min 56s + 4min 56s,1.7 Gb + 103 Mb
0,LDACorrectionNet with init,0.364155,0.206849,0.141324,0.364155,0.337262,0.392971,3min 56s + 3min 39s + 1min 30s,1.7 Gb + 8 Mb
0,LDACorrectionNetLarge with init,0.31621,0.188128,0.129909,0.31621,0.300579,0.352683,3min 56s + 3min 39s + 4min 23s,1.7 Gb + 97 Mb
