# Setup
### Imports

In [1]:
import sys
sys.path.append('../')
del sys

%reload_ext autoreload
%autoreload 2

import modeling.models as models
from toolbox.utils import to_class_name, load_task, get_pretrained_model
from toolbox.parameters import SCORES_NAMES, MODELS_RANDOM_SEED
from toolbox.paths import MODELING_TASK_FOR_BASELINES_PATH, PRETRAINED_MODELS_PATH, TENSORBOARD_LOGS_BASELINES_PATH
from run_baselines import play_baseline

root = "../"

### Parameters

In [2]:
args = {
    'task': "context_free",
    'batch_size': 64,
    'cross_validation': False,
    'short': False,
    'experiment': "context_free_0"
}

In [3]:
task_name = args['task']
batch_size = args['batch_size']
cross_validation = args['cross_validation']
short = args['short']
experiment_name = args['experiment']

### Load the data

In [5]:
task = load_task(task_name=task_name,
                 batch_size=batch_size,
                 cross_validation=cross_validation,
                 short=short,
                 folder_path=MODELING_TASK_FOR_BASELINES_PATH,
                 root=root)

Task loaded from ../results/modeling_task/baselines_split/contextfree_bs64.pkl.



# Run the baselines
## Basic baselines
### Models

In [6]:
model_names = [
    "random",
    "frequency",
    "summaries_count",
    "summaries_unique_count",
    "summaries_overlap",
    "activated_summaries",
    "context_count",
    "context_unique_count",
    "summaries_context_count",
    "summaries_context_unique_count",
    "summaries_context_overlap",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Run the models

In [7]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=None,
                                        pretrained_model_dim=None,
                                        tensorboard_logs_path=TENSORBOARD_LOGS_BASELINES_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=MODELS_RANDOM_SEED,
                                        root=root)
    
    play_baseline(task=task,
                  model=model)

Random
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.00558 (+/-0.01336)
recall_at_10: 0.00533 (+/-0.04221)
reciprocal_best_rank: 0.01057 (+/-0.03406)
reciprocal_average_rank: 0.00155 (+/-0.00190)
ndcg_at_10: 0.00311 (+/-0.02539)

Frequency
Learning answers counts...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.13992 (+/-0.18235)
recall_at_10: 0.25031 (+/-0.28810)
reciprocal_best_rank: 0.27200 (+/-0.34137)
reciprocal_average_rank: 0.03507 (+/-0.11731)
ndcg_at_10: 0.19070 (+/-0.23355)

SummariesCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.05354 (+/-0.12194)
recall_at_10: 0.08283 (+/-0.18825)
reciprocal_best_rank: 0.11762 (+/-0.26353)
reciprocal_average_rank: 0.00801 (+/-0.04121)
ndcg_at_10: 0.06908 (+/-0.16317)

SummariesUniqueCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03181 (+/-0.08206)
recall_at_10: 0.04794 (+/-0.13366)
reciprocal_best_rank: 0.07589 (+/-0.20926)
reciprocal_average_rank: 0.00413 (+/-0.01740)
ndcg_at_10: 0.03919 (+/-0.11654)

SummariesOverlap
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.04749 (+/-0.11233)
recall_at_10: 0.07000 (+/-0.17056)
reciprocal_best_rank: 0.10889 (+/-0.25618)
reciprocal_average_rank: 0.00644 (+/-0.03317)
ndcg_at_10: 0.06143 (+/-0.15278)

ActivatedSummaries
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03080 (+/-0.08252)
recall_at_10: 0.05420 (+/-0.16242)
reciprocal_best_rank: 0.05868 (+/-0.15879)
reciprocal_average_rank: 0.00715 (+/-0.03867)
ndcg_at_10: 0.03665 (+/-0.11556)

ContextCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.04078 (+/-0.11213)
recall_at_10: 0.06512 (+/-0.17035)
reciprocal_best_rank: 0.08587 (+/-0.22051)
reciprocal_average_rank: 0.00745 (+/-0.04666)
ndcg_at_10: 0.05208 (+/-0.14482)

ContextUniqueCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03092 (+/-0.08196)
recall_at_10: 0.05694 (+/-0.15815)
reciprocal_best_rank: 0.06932 (+/-0.19038)
reciprocal_average_rank: 0.00544 (+/-0.02730)
ndcg_at_10: 0.04124 (+/-0.11822)

SummariesContextCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.06633 (+/-0.13654)
recall_at_10: 0.10578 (+/-0.21777)
reciprocal_best_rank: 0.14618 (+/-0.29431)
reciprocal_average_rank: 0.01060 (+/-0.04929)
ndcg_at_10: 0.08705 (+/-0.18314)

SummariesContextUniqueCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03491 (+/-0.08658)
recall_at_10: 0.05465 (+/-0.15528)
reciprocal_best_rank: 0.08024 (+/-0.21765)
reciprocal_average_rank: 0.00548 (+/-0.02059)
ndcg_at_10: 0.04272 (+/-0.12445)

SummariesContextOverlap
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.04377 (+/-0.09942)
recall_at_10: 0.07098 (+/-0.17367)
reciprocal_best_rank: 0.10168 (+/-0.24347)
reciprocal_average_rank: 0.00657 (+/-0.02548)
ndcg_at_10: 0.05667 (+/-0.14267)



## Embedding baselines
### Models

In [8]:
model_names = [
    "summaries_average_embedding",
    "summaries_overlap_average_embedding",
    "context_average_embedding",
    "summaries_context_average_embedding",
    "summaries_context_overlap_average_embedding",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Pretrained model

In [9]:
word_embedding, word_embedding_dim = get_pretrained_model(pretrained_model_name="word2vec",
                                                          folder_path=PRETRAINED_MODELS_PATH,
                                                          root=root)

Word2Vec embedding loaded.



### Run the models

In [10]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=word_embedding,
                                        pretrained_model_dim=word_embedding_dim,
                                        tensorboard_logs_path=TENSORBOARD_LOGS_BASELINES_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=MODELS_RANDOM_SEED,
                                        root=root)
    
    play_baseline(task=task,
                  model=model)

SummariesAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.07567 (+/-0.16339)
recall_at_10: 0.11259 (+/-0.23749)
reciprocal_best_rank: 0.13237 (+/-0.26999)
reciprocal_average_rank: 0.02211 (+/-0.08810)
ndcg_at_10: 0.08923 (+/-0.20029)

SummariesOverlapAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.09888 (+/-0.18520)
recall_at_10: 0.15614 (+/-0.27600)
reciprocal_best_rank: 0.17499 (+/-0.31296)
reciprocal_average_rank: 0.02791 (+/-0.09596)
ndcg_at_10: 0.12350 (+/-0.22850)

ContextAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.05171 (+/-0.12502)
recall_at_10: 0.08281 (+/-0.20065)
reciprocal_best_rank: 0.09942 (+/-0.23242)
reciprocal_average_rank: 0.01172 (+/-0.05311)
ndcg_at_10: 0.06359 (+/-0.16303)

SummariesContextAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.07654 (+/-0.16248)
recall_at_10: 0.12131 (+/-0.23946)
reciprocal_best_rank: 0.14094 (+/-0.28233)
reciprocal_average_rank: 0.02109 (+/-0.08930)
ndcg_at_10: 0.09479 (+/-0.20070)

SummariesContextOverlapAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.05561 (+/-0.12649)
recall_at_10: 0.08966 (+/-0.20696)
reciprocal_best_rank: 0.10563 (+/-0.23656)
reciprocal_average_rank: 0.01372 (+/-0.06178)
ndcg_at_10: 0.06754 (+/-0.16446)



## BART baselines
### Models

In [None]:
model_names = [
    "summaries_bart_mnli",
    "context_bart_mnli",
    "summaries_context_bart_mnli",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Pretrained model

In [13]:
bart_mnli, _ = get_pretrained_model(pretrained_model_name="bart_mnli",
                                    folder_path=PRETRAINED_MODELS_PATH,
                                    root=root)

loading archive file ../modeling/pretrained_models/bart.large.mnli
| dictionary: 50264 types
Registering classification head: mnli
Pretrained BART.mnli loaded.



### Run the models

In [None]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=bart_mnli,
                                        pretrained_model_dim=None,
                                        tensorboard_logs_path=TENSORBOARD_LOGS_BASELINES_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=MODELS_RANDOM_SEED,
                                        root=root)
    
    play_baseline(task=task,
                  model=model)

SummariesAverageEmbedding
Validation of the model...



HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


Validation Score: 0.07634
Scores evaluated on the validation set:
average_precision: 0.07634
recall_at_10: 0.11513
reciprocal_best_rank: 0.13253
reciprocal_average_rank: 0.02128
ndcg_at_10: 0.09071


SummariesOverlapAverageEmbedding
Validation of the model...



HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


Validation Score: 0.10399
Scores evaluated on the validation set:
average_precision: 0.10399
recall_at_10: 0.15489
reciprocal_best_rank: 0.19313
reciprocal_average_rank: 0.02827
ndcg_at_10: 0.12905


ContextAverageEmbedding
Validation of the model...



HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))