In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
! pip install catboost

Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/2f/c4/f130237b24efd1941cb685da12496675a90045129b66774751f1bf629dfd/catboost-0.14.2-cp36-none-manylinux1_x86_64.whl (60.6MB)
[K     |████████████████████████████████| 60.6MB 1.8MB/s 
Installing collected packages: catboost
Successfully installed catboost-0.14.2


In [3]:
import pandas as pd
import numpy as np
from progressbar import progressbar as pb

data = pd.read_csv('/content/drive/My Drive/Embeddings/big_data_labels.csv', index_col=False)
print(len(data))
data.head()

13073


Unnamed: 0,0,1,2,3,4,5,label
0,Совещательный орган при императоре в начале XX в,Сенат,Государственный совет,Консилия министров,Верховный тайный совет,2,История
1,Министр внутренних дел с 1904 г либерал автор ...,Лорис Меликов,Святополк Мирский,Милюков,Витте,2,История
2,Какое из названных событий произошло 27 января...,подрыв флагманского корабля Петропавловск,высадка японских войск на Ляодунском полуострове,героический бой Варяга и Корейца,заключение Портсмутского мирного договора,3,История
3,Что из названного относится к результатам перв...,появление многопартийности,формирование конституционной монархии,ликвидация сословного строя,введение рабочего контроля за производством,1,История
4,Творчество поэтов Гумилева Ахматовой Мандельшт...,акмеизму,футуризму,импрессионизму,реализму,1,История


In [0]:
import pickle

with open('/content/drive/My Drive/Embeddings/elmo_embed_big.pickle', 'rb') as f:
    elmo_embs = pickle.load(f)

elmo_embs = np.array(elmo_embs)

with open('/content/drive/My Drive/Embeddings/bert_embed_big.pickle', 'rb') as f:
    bert_embs = pickle.load(f)

bert_embs = np.array(bert_embs)

with open('/content/drive/My Drive/Embeddings/fasttext_embed_big.pickle', 'rb') as f:
    fasttext_embs = pickle.load(f)

fasttext_embs = np.array(fasttext_embs)

with open('/content/drive/My Drive/Embeddings/rusvec_embed_big.pickle', 'rb') as f:
    rusvec_embs = pickle.load(f)

rusvec_embs = np.array(rusvec_embs)

In [0]:
embeddings = {'RusVectores': rusvec_embs, 'FastText': fasttext_embs, 'ELMO': elmo_embs, 'BERT': bert_embs}

In [0]:
from catboost import CatBoostClassifier

def get_samples(data, embeddings, train_part, random_seed):
    
    '''
    Из вектора вопроса (q_vec) и векторов ответов (a1_vec, ... a2_vec) 
    создаем четыре вектора (q_vec | a1_vec), ... (q_vec | a2_vec) 
    с соответствующей бинарной разметкой, где | - конкатенация. 
    
    Делим выборку на трейн и тест. 
    
    data - исходный датасет с разметкой
    embeddings - векторное представление исходного датасета
    train_part - доля тренировочной выборки
    random_seed - сид для повторяемости эксперимента
    
    return:
    
    X_train, X_test, y_train, y_test - разделенная на тренировочную и тестовую части выборка,
    имеющая вышеописанную структуру
    
    '''
    
    predata = np.zeros((embeddings.shape[0] * 4, embeddings.shape[2] * 2))
    target = []
    np.random.seed(random_seed)

    for i in range(len(data)):
        for j in range(1,5):
            cur_targ = 1 if j == int(data.iloc[i,5]) else 0
            cur_vec = np.hstack((embeddings[i,0], embeddings[i,j]))
            predata[i*4+j-1] = cur_vec
            target.append(cur_targ)
            
    all_inds = np.arange(embeddings.shape[0])
    train_size = int(len(all_inds) * train_part)
    train_inds = np.random.choice(all_inds, size=train_size, replace=False)
    test_inds = np.array(list(set(all_inds) - set(train_inds)))
    
    X_train, X_test, y_train, y_test = [],[],[],[]

    for i in range(len(train_inds)):
        for j in range(4):
            X_train.append(predata[train_inds[i] * 4 + j])
            y_train.append(target[train_inds[i] * 4 + j])

    for i in range(len(test_inds)):
        for j in range(4):
            X_test.append(predata[test_inds[i] * 4 + j])
            y_test.append(target[test_inds[i] * 4 + j])
            
    return X_train, X_test, y_train, y_test

In [0]:
from sklearn.metrics import accuracy_score, precision_score, f1_score

def approach_boosting(data, embeddings, train_part=0.7, random_seed=4, iterations=1000):
    
    '''
    Обучаем логрегрессию и делаем предсказание.
    
    data - исходный датасет с разметкой
    embeddings - векторное представление исходного датасета
    train_part - доля тренировочной выборки
    random_seed - сид для повторяемости эксперимента
    
    return:
    
    accuracy - значение метрики точности для предсказания
    
    '''
    
    X_train, X_test, y_train, y_test = get_samples(data, embeddings, train_part, random_seed)
    
    clf = CatBoostClassifier(iterations=iterations, verbose=100, random_seed=random_seed, task_type='GPU')
    clf.fit(X_train, y_train)
    
    y_pred = clf.predict_proba(X_test)
    y_pred_new = []

    for i in range(len(y_pred) // 4):
        cur_probs, y_pred_splitted = [], [0,0,0,0]
        for j in range(4):
            cur_probs.append(y_pred[i*4 + j][1])
        y_pred_splitted[np.argmax(cur_probs)] = 1
        y_pred_new.extend(y_pred_splitted)
        
    y_pred_splitted, y_test_splitted = [], []

    for i in range(len(y_pred) // 4):
        y_pred_splitted.append(np.argmax(y_pred_new[i*4:i*4+4]))
        y_test_splitted.append(np.argmax(y_test[i*4:i*4+4]))
        
    accuracy = accuracy_score(y_test_splitted, y_pred_splitted)
    precision_macro = precision_score(y_test_splitted, y_pred_splitted, average='macro')
    precision_micro = precision_score(y_test_splitted, y_pred_splitted, average='micro')
    f1_macro = f1_score(y_test_splitted, y_pred_splitted, average='macro')
    f1_micro = f1_score(y_test_splitted, y_pred_splitted, average='micro')
    
    return accuracy, precision_macro, precision_micro, f1_macro, f1_micro

In [0]:
results_dict = {}
for key, embed in pb(embeddings.items()):
    results_dict[key] = approach_boosting(data, embed)

                                                                               N/A% (0 of 4) |                          | Elapsed Time: 0:00:00 ETA:  --:--:--

Learning rate set to 0.046409
0:	learn: 0.6805634	total: 160ms	remaining: 2m 39s
100:	learn: 0.5500172	total: 8.04s	remaining: 1m 11s
200:	learn: 0.5400951	total: 16s	remaining: 1m 3s
300:	learn: 0.5306998	total: 24.2s	remaining: 56.3s
400:	learn: 0.5216918	total: 32.5s	remaining: 48.6s
500:	learn: 0.5138138	total: 40.8s	remaining: 40.6s
600:	learn: 0.5070031	total: 49.1s	remaining: 32.6s
700:	learn: 0.5009823	total: 57.3s	remaining: 24.4s
800:	learn: 0.4952416	total: 1m 5s	remaining: 16.2s
900:	learn: 0.4899072	total: 1m 13s	remaining: 8.05s
999:	learn: 0.4851906	total: 1m 21s	remaining: 0us


                                                                                25% (1 of 4) |######                    | Elapsed Time: 0:01:30 ETA:   0:04:31

Learning rate set to 0.046409
0:	learn: 0.6802794	total: 154ms	remaining: 2m 34s
100:	learn: 0.5481732	total: 8.66s	remaining: 1m 17s
200:	learn: 0.5386997	total: 16.9s	remaining: 1m 7s
300:	learn: 0.5300076	total: 25.2s	remaining: 58.5s
400:	learn: 0.5219475	total: 33.6s	remaining: 50.2s
500:	learn: 0.5152320	total: 41.9s	remaining: 41.7s
600:	learn: 0.5090543	total: 49.9s	remaining: 33.1s
700:	learn: 0.5032669	total: 57.8s	remaining: 24.6s
800:	learn: 0.4985347	total: 1m 5s	remaining: 16.3s
900:	learn: 0.4938598	total: 1m 13s	remaining: 8.04s
999:	learn: 0.4898375	total: 1m 20s	remaining: 0us


                                                                                50% (2 of 4) |#############             | Elapsed Time: 0:03:01 ETA:   0:03:01

Learning rate set to 0.046409
0:	learn: 0.6804658	total: 313ms	remaining: 5m 12s
100:	learn: 0.5471387	total: 18.8s	remaining: 2m 47s
200:	learn: 0.5357623	total: 37.2s	remaining: 2m 27s
300:	learn: 0.5249379	total: 55.6s	remaining: 2m 9s
400:	learn: 0.5143562	total: 1m 14s	remaining: 1m 50s
500:	learn: 0.5047168	total: 1m 32s	remaining: 1m 32s
600:	learn: 0.4956423	total: 1m 50s	remaining: 1m 13s
700:	learn: 0.4876021	total: 2m 8s	remaining: 55s
800:	learn: 0.4794081	total: 2m 27s	remaining: 36.6s
900:	learn: 0.4721379	total: 2m 45s	remaining: 18.1s
999:	learn: 0.4650397	total: 3m 3s	remaining: 0us


                                                                                75% (3 of 4) |###################       | Elapsed Time: 0:06:35 ETA:   0:03:33

Learning rate set to 0.046409
0:	learn: 0.6803929	total: 254ms	remaining: 4m 14s
100:	learn: 0.5466689	total: 14.8s	remaining: 2m 11s
200:	learn: 0.5354614	total: 29.5s	remaining: 1m 57s
300:	learn: 0.5244740	total: 44.5s	remaining: 1m 43s
400:	learn: 0.5139998	total: 59.4s	remaining: 1m 28s
500:	learn: 0.5039488	total: 1m 14s	remaining: 1m 13s
600:	learn: 0.4945090	total: 1m 28s	remaining: 59s
700:	learn: 0.4860371	total: 1m 43s	remaining: 44.2s
800:	learn: 0.4779166	total: 1m 58s	remaining: 29.4s
900:	learn: 0.4702382	total: 2m 12s	remaining: 14.6s
999:	learn: 0.4625377	total: 2m 27s	remaining: 0us


                                                                               100% (4 of 4) |##########################| Elapsed Time: 0:09:26 Time:  0:09:26


In [0]:
results = pd.DataFrame(data=results_dict, columns=list(embeddings.keys()), \
                       index=['Accuracy', 'Precision Macro', 'Precision Micro', 'F1 Macro', 'F1 Micro'])
results

Unnamed: 0,RusVectores,FastText,ELMO,BERT
Accuracy,0.329169,0.327129,0.338858,0.351351
Precision Macro,0.32954,0.325897,0.336947,0.349796
Precision Micro,0.329169,0.327129,0.338858,0.351351
F1 Macro,0.326962,0.325663,0.337002,0.34959
F1 Micro,0.329169,0.327129,0.338858,0.351351


## История

In [32]:
hist_inds = []

for i in range(len(data)):
  if data.iloc[i,6] == 'История':
    hist_inds.append(i)
len(hist_inds)

2498

In [0]:
elmo_embs_hist = elmo_embs[hist_inds]
bert_embs_hist = bert_embs[hist_inds]
fasttext_embs_hist = fasttext_embs[hist_inds]
rusvec_embs_hist = rusvec_embs[hist_inds]

In [0]:
embeddings = {'RusVectores': rusvec_embs_hist, 'FastText': fasttext_embs_hist, 'ELMO': elmo_embs_hist, 'BERT': bert_embs_hist}
data2 = data[data['label'] == 'История']

In [35]:
results_dict = {}
for key, embed in pb(embeddings.items()):
    results_dict[key] = approach_boosting(data2, embed)

                                                                               N/A% (0 of 4) |                          | Elapsed Time: 0:00:00 ETA:  --:--:--

Learning rate set to 0.02905
0:	learn: 0.6851034	total: 120ms	remaining: 1m 59s
100:	learn: 0.5289331	total: 6.36s	remaining: 56.6s
200:	learn: 0.5024105	total: 12.5s	remaining: 49.5s
300:	learn: 0.4808585	total: 18.5s	remaining: 42.9s
400:	learn: 0.4619006	total: 24.5s	remaining: 36.7s
500:	learn: 0.4438671	total: 30.7s	remaining: 30.5s
600:	learn: 0.4276957	total: 36.9s	remaining: 24.5s
700:	learn: 0.4130709	total: 43s	remaining: 18.3s
800:	learn: 0.4004616	total: 49.1s	remaining: 12.2s
900:	learn: 0.3892117	total: 55.2s	remaining: 6.06s
999:	learn: 0.3787768	total: 1m 1s	remaining: 0us


                                                                                25% (1 of 4) |######                    | Elapsed Time: 0:01:03 ETA:   0:03:10

Learning rate set to 0.02905
0:	learn: 0.6849351	total: 120ms	remaining: 2m
100:	learn: 0.5306489	total: 6.38s	remaining: 56.8s
200:	learn: 0.5051870	total: 12.5s	remaining: 49.5s
300:	learn: 0.4812036	total: 18.6s	remaining: 43.3s
400:	learn: 0.4605313	total: 24.9s	remaining: 37.2s
500:	learn: 0.4426284	total: 31.1s	remaining: 31s
600:	learn: 0.4263762	total: 37.2s	remaining: 24.7s
700:	learn: 0.4112654	total: 43.4s	remaining: 18.5s
800:	learn: 0.3968733	total: 49.6s	remaining: 12.3s
900:	learn: 0.3860772	total: 55.7s	remaining: 6.12s
999:	learn: 0.3758094	total: 1m 1s	remaining: 0us


                                                                                50% (2 of 4) |#############             | Elapsed Time: 0:02:07 ETA:   0:02:08

Learning rate set to 0.02905
0:	learn: 0.6849596	total: 216ms	remaining: 3m 35s
100:	learn: 0.5283354	total: 13.7s	remaining: 2m 1s
200:	learn: 0.4981417	total: 27s	remaining: 1m 47s
300:	learn: 0.4715241	total: 40.3s	remaining: 1m 33s
400:	learn: 0.4483165	total: 53.4s	remaining: 1m 19s
500:	learn: 0.4248592	total: 1m 6s	remaining: 1m 6s
600:	learn: 0.4055588	total: 1m 19s	remaining: 53s
700:	learn: 0.3855858	total: 1m 33s	remaining: 39.8s
800:	learn: 0.3678012	total: 1m 46s	remaining: 26.5s
900:	learn: 0.3516896	total: 2m	remaining: 13.2s
999:	learn: 0.3372302	total: 2m 13s	remaining: 0us


                                                                                75% (3 of 4) |###################       | Elapsed Time: 0:04:28 ETA:   0:02:20

Learning rate set to 0.02905
0:	learn: 0.6851277	total: 192ms	remaining: 3m 11s
100:	learn: 0.5253374	total: 11.2s	remaining: 1m 39s
200:	learn: 0.4931971	total: 22s	remaining: 1m 27s
300:	learn: 0.4651860	total: 32.8s	remaining: 1m 16s
400:	learn: 0.4390760	total: 43.6s	remaining: 1m 5s
500:	learn: 0.4141975	total: 54.5s	remaining: 54.3s
600:	learn: 0.3939520	total: 1m 5s	remaining: 43.3s
700:	learn: 0.3738714	total: 1m 16s	remaining: 32.5s
800:	learn: 0.3557548	total: 1m 27s	remaining: 21.6s
900:	learn: 0.3387592	total: 1m 38s	remaining: 10.8s
999:	learn: 0.3236450	total: 1m 48s	remaining: 0us


                                                                               100% (4 of 4) |##########################| Elapsed Time: 0:06:22 Time:  0:06:22


In [36]:
results = pd.DataFrame(data=results_dict, columns=list(embeddings.keys()), \
                       index=['Accuracy', 'Precision Macro', 'Precision Micro', 'F1 Macro', 'F1 Micro'])
results

Unnamed: 0,RusVectores,FastText,ELMO,BERT
Accuracy,0.365333,0.381333,0.418667,0.381333
Precision Macro,0.369743,0.380915,0.417673,0.380186
Precision Micro,0.365333,0.381333,0.418667,0.381333
F1 Macro,0.365686,0.38118,0.417663,0.380241
F1 Micro,0.365333,0.381333,0.418667,0.381333


## Медицина

In [38]:
med_inds = []

for i in range(len(data)):
  if data.iloc[i,6] == 'Медицина':
    med_inds.append(i)
len(med_inds)

4013

In [0]:
elmo_embs_med = elmo_embs[med_inds]
bert_embs_med = bert_embs[med_inds]
fasttext_embs_med = fasttext_embs[med_inds]
rusvec_embs_med = rusvec_embs[med_inds]

In [0]:
embeddings = {'RusVectores': rusvec_embs_med, 'FastText': fasttext_embs_med, 'ELMO': elmo_embs_med, 'BERT': bert_embs_med}
data2 = data[data['label'] == 'Медицина']

In [42]:
results_dict = {}
for key, embed in pb(embeddings.items()):
    results_dict[key] = approach_boosting(data2, embed)

                                                                               N/A% (0 of 4) |                          | Elapsed Time: 0:00:00 ETA:  --:--:--

Learning rate set to 0.033224
0:	learn: 0.6837295	total: 117ms	remaining: 1m 56s
100:	learn: 0.5372886	total: 6.19s	remaining: 55.1s
200:	learn: 0.5194180	total: 12.3s	remaining: 49.1s
300:	learn: 0.5040901	total: 18.5s	remaining: 43s
400:	learn: 0.4900079	total: 24.6s	remaining: 36.7s
500:	learn: 0.4776556	total: 30.7s	remaining: 30.5s
600:	learn: 0.4672518	total: 36.6s	remaining: 24.3s
700:	learn: 0.4579710	total: 42.6s	remaining: 18.2s
800:	learn: 0.4499700	total: 48.6s	remaining: 12.1s
900:	learn: 0.4421158	total: 54.6s	remaining: 6s
999:	learn: 0.4362464	total: 1m	remaining: 0us


                                                                                25% (1 of 4) |######                    | Elapsed Time: 0:01:03 ETA:   0:03:10

Learning rate set to 0.033224
0:	learn: 0.6830421	total: 127ms	remaining: 2m 7s
100:	learn: 0.5299416	total: 6.38s	remaining: 56.8s
200:	learn: 0.5132769	total: 12.7s	remaining: 50.4s
300:	learn: 0.4979740	total: 18.9s	remaining: 43.9s
400:	learn: 0.4849765	total: 25s	remaining: 37.4s
500:	learn: 0.4718881	total: 31.2s	remaining: 31.1s
600:	learn: 0.4617322	total: 37.3s	remaining: 24.8s
700:	learn: 0.4533239	total: 43.4s	remaining: 18.5s
800:	learn: 0.4453585	total: 49.4s	remaining: 12.3s
900:	learn: 0.4373665	total: 55.5s	remaining: 6.09s
999:	learn: 0.4310827	total: 1m 1s	remaining: 0us


                                                                                50% (2 of 4) |#############             | Elapsed Time: 0:02:08 ETA:   0:02:10

Learning rate set to 0.033224
0:	learn: 0.6832684	total: 245ms	remaining: 4m 4s
100:	learn: 0.5298807	total: 13.6s	remaining: 2m 1s
200:	learn: 0.5077096	total: 27.5s	remaining: 1m 49s
300:	learn: 0.4885661	total: 41.1s	remaining: 1m 35s
400:	learn: 0.4707930	total: 54.7s	remaining: 1m 21s
500:	learn: 0.4544966	total: 1m 8s	remaining: 1m 7s
600:	learn: 0.4392199	total: 1m 21s	remaining: 54.2s
700:	learn: 0.4259985	total: 1m 34s	remaining: 40.4s
800:	learn: 0.4127461	total: 1m 48s	remaining: 26.9s
900:	learn: 0.4010111	total: 2m 2s	remaining: 13.4s
999:	learn: 0.3906017	total: 2m 15s	remaining: 0us


                                                                                75% (3 of 4) |###################       | Elapsed Time: 0:04:34 ETA:   0:02:25

Learning rate set to 0.033224
0:	learn: 0.6833790	total: 223ms	remaining: 3m 42s
100:	learn: 0.5286229	total: 10.9s	remaining: 1m 37s
200:	learn: 0.5074035	total: 22s	remaining: 1m 27s
300:	learn: 0.4881421	total: 33.1s	remaining: 1m 16s
400:	learn: 0.4694696	total: 44.2s	remaining: 1m 6s
500:	learn: 0.4527081	total: 55.1s	remaining: 54.8s
600:	learn: 0.4368857	total: 1m 5s	remaining: 43.8s
700:	learn: 0.4224451	total: 1m 16s	remaining: 32.7s
800:	learn: 0.4091307	total: 1m 27s	remaining: 21.7s
900:	learn: 0.3969599	total: 1m 37s	remaining: 10.8s
999:	learn: 0.3852951	total: 1m 48s	remaining: 0us


                                                                               100% (4 of 4) |##########################| Elapsed Time: 0:06:30 Time:  0:06:30


In [43]:
results = pd.DataFrame(data=results_dict, columns=list(embeddings.keys()), \
                       index=['Accuracy', 'Precision Macro', 'Precision Micro', 'F1 Macro', 'F1 Micro'])
results

Unnamed: 0,RusVectores,FastText,ELMO,BERT
Accuracy,0.343854,0.366279,0.353821,0.355482
Precision Macro,0.345052,0.35727,0.345469,0.346969
Precision Micro,0.343854,0.366279,0.353821,0.355482
F1 Macro,0.33889,0.354801,0.344378,0.345893
F1 Micro,0.343854,0.366279,0.353821,0.355482


## Биология

In [44]:
bio_inds = []

for i in range(len(data)):
  if data.iloc[i,6] == 'Биология':
    bio_inds.append(i)
len(bio_inds)

2184

In [0]:
elmo_embs_bio = elmo_embs[bio_inds]
bert_embs_bio = bert_embs[bio_inds]
fasttext_embs_bio = fasttext_embs[bio_inds]
rusvec_embs_bio = rusvec_embs[bio_inds]

In [0]:
embeddings = {'RusVectores': rusvec_embs_bio, 'FastText': fasttext_embs_bio, 'ELMO': elmo_embs_bio, 'BERT': bert_embs_bio}
data2 = data[data['label'] == 'Биология']

In [49]:
results_dict = {}
for key, embed in pb(embeddings.items()):
    results_dict[key] = approach_boosting(data2, embed)

                                                                               N/A% (0 of 4) |                          | Elapsed Time: 0:00:00 ETA:  --:--:--

Learning rate set to 0.027965
0:	learn: 0.6852050	total: 108ms	remaining: 1m 48s
100:	learn: 0.5335535	total: 5.39s	remaining: 48s
200:	learn: 0.5113256	total: 10.7s	remaining: 42.5s
300:	learn: 0.4923918	total: 16s	remaining: 37.2s
400:	learn: 0.4754719	total: 21.4s	remaining: 31.9s
500:	learn: 0.4599823	total: 26.7s	remaining: 26.6s
600:	learn: 0.4485528	total: 31.9s	remaining: 21.2s
700:	learn: 0.4371943	total: 37.1s	remaining: 15.8s
800:	learn: 0.4274641	total: 42.3s	remaining: 10.5s
900:	learn: 0.4181635	total: 47.2s	remaining: 5.19s
999:	learn: 0.4097267	total: 52.2s	remaining: 0us


                                                                                25% (1 of 4) |######                    | Elapsed Time: 0:00:54 ETA:   0:02:43

Learning rate set to 0.027965
0:	learn: 0.6851259	total: 61ms	remaining: 1m
100:	learn: 0.5339947	total: 5.34s	remaining: 47.5s
200:	learn: 0.5119066	total: 10.6s	remaining: 42.1s
300:	learn: 0.4948597	total: 15.8s	remaining: 36.7s
400:	learn: 0.4789539	total: 21s	remaining: 31.4s
500:	learn: 0.4630959	total: 26.3s	remaining: 26.2s
600:	learn: 0.4516279	total: 31.5s	remaining: 20.9s
700:	learn: 0.4402598	total: 36.7s	remaining: 15.6s
800:	learn: 0.4306493	total: 41.8s	remaining: 10.4s
900:	learn: 0.4222861	total: 46.9s	remaining: 5.15s
999:	learn: 0.4148059	total: 51.7s	remaining: 0us


                                                                                50% (2 of 4) |#############             | Elapsed Time: 0:01:48 ETA:   0:01:48

Learning rate set to 0.027965
0:	learn: 0.6853504	total: 201ms	remaining: 3m 21s
100:	learn: 0.5298028	total: 11.7s	remaining: 1m 43s
200:	learn: 0.5020177	total: 23s	remaining: 1m 31s
300:	learn: 0.4801375	total: 34.1s	remaining: 1m 19s
400:	learn: 0.4584921	total: 45.2s	remaining: 1m 7s
500:	learn: 0.4389511	total: 56.2s	remaining: 56s
600:	learn: 0.4226232	total: 1m 6s	remaining: 44.4s
700:	learn: 0.4068348	total: 1m 17s	remaining: 33.3s
800:	learn: 0.3928195	total: 1m 28s	remaining: 22.1s
900:	learn: 0.3795334	total: 1m 39s	remaining: 11s
999:	learn: 0.3676927	total: 1m 50s	remaining: 0us


                                                                                75% (3 of 4) |###################       | Elapsed Time: 0:03:45 ETA:   0:01:56

Learning rate set to 0.027965
0:	learn: 0.6852233	total: 168ms	remaining: 2m 47s
100:	learn: 0.5277987	total: 9.33s	remaining: 1m 23s
200:	learn: 0.4995235	total: 18.4s	remaining: 1m 13s
300:	learn: 0.4762838	total: 27.3s	remaining: 1m 3s
400:	learn: 0.4535095	total: 36.4s	remaining: 54.3s
500:	learn: 0.4317710	total: 45.4s	remaining: 45.2s
600:	learn: 0.4130740	total: 54.4s	remaining: 36.1s
700:	learn: 0.3942606	total: 1m 3s	remaining: 27.1s
800:	learn: 0.3792282	total: 1m 12s	remaining: 17.9s
900:	learn: 0.3644930	total: 1m 21s	remaining: 8.91s
999:	learn: 0.3512521	total: 1m 29s	remaining: 0us


                                                                               100% (4 of 4) |##########################| Elapsed Time: 0:05:19 Time:  0:05:19


In [50]:
results = pd.DataFrame(data=results_dict, columns=list(embeddings.keys()), \
                       index=['Accuracy', 'Precision Macro', 'Precision Micro', 'F1 Macro', 'F1 Micro'])
results

Unnamed: 0,RusVectores,FastText,ELMO,BERT
Accuracy,0.300305,0.28811,0.333841,0.292683
Precision Macro,0.303651,0.288704,0.337126,0.292623
Precision Micro,0.300305,0.28811,0.333841,0.292683
F1 Macro,0.299034,0.287634,0.334117,0.292425
F1 Micro,0.300305,0.28811,0.333841,0.292683
