In [1]:
from models import SentenceEmbedders, utils
from models import experiments

### Tests

In [2]:
bert_tests = utils.load_pkl('bert-embeddings/bert_tests_allLayers_maxlen200.pkl')

In [6]:
mc_experiment = experiments.ExperimentResult(log = ['embeddings_model', 
                                                    'classifier',
                                                    'layers', 
                                                    'pooling', 
                                                    'accuracy', 
                                                    'f1'])
df = experiments.run_inputs_experiments(mc_experiment,
                                        bert_tests,
                                        classifier='CatBoost',
                                        problem_type='multiple-choice',
                                        strategy=['mean', 'max'], 
                                        single_layers=[i for i in range(1, 13)],
                                        layers=[(1, 12), (1, 6), (7, 12), (1, 3), (4, 6), (6, 9), (9, 12)])

In [7]:
print('index of the best accuracy score: ', df['accuracy'].idxmax())
print('index of the best f1: ', df['f1'].idxmax())

index of the best accuracy score:  23
index of the best f1:  23


In [8]:
df

Unnamed: 0,embeddings_model,classifier,layers,pooling,accuracy,f1
0,bert-as-service,CatBoost,1,single_layer,0.318,0.318
1,bert-as-service,CatBoost,2,single_layer,0.32,0.32
2,bert-as-service,CatBoost,3,single_layer,0.338,0.338
3,bert-as-service,CatBoost,4,single_layer,0.332,0.332
4,bert-as-service,CatBoost,5,single_layer,0.317,0.317
5,bert-as-service,CatBoost,6,single_layer,0.322,0.322
6,bert-as-service,CatBoost,7,single_layer,0.312,0.311
7,bert-as-service,CatBoost,8,single_layer,0.329,0.329
8,bert-as-service,CatBoost,9,single_layer,0.329,0.328
9,bert-as-service,CatBoost,10,single_layer,0.319,0.319


### News

In [2]:
bert_news_1 = utils.load_pkl('bert-embeddings/bert_news_LAYERS_1-6_maxlen200.pkl')
bert_news_2 = utils.load_pkl('bert-embeddings/bert_news_LAYERS_7-12_maxlen200.pkl')

In [9]:
news_experiment = experiments.ExperimentResult(log = ['embeddings_model', 
                                                      'classifier',
                                                      'layers', 
                                                      'pooling', 
                                                      'accuracy', 
                                                      'f1'])
df_news_1 = experiments.run_inputs_experiments(news_experiment,
                                        bert_news_1,
                                        classifier='CatBoost',
                                        problem_type='multiple-choice',
                                        strategy=['mean', 'max'], 
                                        single_layers=[i for i in range(1, 7)],
                                        layers=[(1, 6), (1, 3), (4, 6)])

In [10]:
news_experiment = experiments.ExperimentResult(log = ['embeddings_model', 
                                                      'classifier',
                                                      'layers', 
                                                      'pooling', 
                                                      'accuracy', 
                                                      'f1'])
df_news_2 = experiments.run_inputs_experiments(news_experiment,
                                        bert_news_2,
                                        classifier='CatBoost',
                                        problem_type='multiple-choice',
                                        strategy=['mean', 'max'], 
                                        single_layers=[i for i in range(1, 7)],
                                        layers=[(1, 6), (1, 3), (4, 6)])

In [5]:
df_news_2['layers'] = df_news_2['layers'].apply(lambda x: (x[0] + 6, x[1] + 6) if isinstance(x, tuple) else x + 6)

In [6]:
import pandas as pd
df_news = pd.concat([df_news_1[:6], df_news_2[:6], df_news_1[6:], df_news_2[6:]])
df_news.reset_index(inplace=True)

In [7]:
print('index of the best accuracy score: ', df_news['accuracy'].idxmax())
print('index of the best f1: ', df_news['f1'].idxmax())

index of the best accuracy score:  18
index of the best f1:  18


In [8]:
df_news

Unnamed: 0,index,embeddings_model,classifier,layers,pooling,accuracy,f1
0,0,bert-as-service,CatBoost,1,single_layer,0.414,0.414
1,1,bert-as-service,CatBoost,2,single_layer,0.447,0.447
2,2,bert-as-service,CatBoost,3,single_layer,0.465,0.465
3,3,bert-as-service,CatBoost,4,single_layer,0.47,0.47
4,4,bert-as-service,CatBoost,5,single_layer,0.498,0.498
5,5,bert-as-service,CatBoost,6,single_layer,0.485,0.485
6,0,bert-as-service,CatBoost,7,single_layer,0.467,0.467
7,1,bert-as-service,CatBoost,8,single_layer,0.489,0.489
8,2,bert-as-service,CatBoost,9,single_layer,0.461,0.461
9,3,bert-as-service,CatBoost,10,single_layer,0.482,0.482


### Paraphrase

In [9]:
bert_paraphrase = utils.load_pkl('bert-embeddings/bert_paraphrase_allLayers_maxlen200.pkl')

In [12]:
paraphrase_experiment = experiments.ExperimentResult(log = ['embeddings_model', 
                                                            'classifier',
                                                            'layers', 
                                                            'pooling', 
                                                            'accuracy', 
                                                            'f1'])
df = experiments.run_inputs_experiments(paraphrase_experiment,
                                        bert_paraphrase,
                                        classifier='CatBoost',
                                        problem_type='paraphrase',
                                        strategy=['mean', 'max'], 
                                        single_layers=[i for i in range(1, 13)],
                                        layers=[(1, 12), (1, 6), (7, 12), (1, 3), (4, 6), (6, 9), (9, 12)])

In [13]:
print('index of the best accuracy score: ', df['accuracy'].idxmax())
print('index of the best f1: ', df['f1'].idxmax())

index of the best accuracy score:  24
index of the best f1:  2


In [14]:
df

Unnamed: 0,embeddings_model,classifier,layers,pooling,accuracy,f1
0,bert-as-service,CatBoost,1,single_layer,0.762,0.708
1,bert-as-service,CatBoost,2,single_layer,0.764,0.711
2,bert-as-service,CatBoost,3,single_layer,0.776,0.732
3,bert-as-service,CatBoost,4,single_layer,0.767,0.717
4,bert-as-service,CatBoost,5,single_layer,0.768,0.725
5,bert-as-service,CatBoost,6,single_layer,0.762,0.716
6,bert-as-service,CatBoost,7,single_layer,0.776,0.73
7,bert-as-service,CatBoost,8,single_layer,0.752,0.698
8,bert-as-service,CatBoost,9,single_layer,0.765,0.712
9,bert-as-service,CatBoost,10,single_layer,0.749,0.696
