# Setup
### Imports

In [1]:
import sys
sys.path.append('../')
del sys

%reload_ext autoreload
%autoreload 2

import modeling.models as models
from toolbox.utils import to_class_name, load_task, get_pretrained_model
from toolbox.parameters import SCORES_NAMES, BASELINES_RANDOM_SEED
from toolbox.paths import MODELING_TASK_RESULTS_PATH, PRETRAINED_MODELS_PATH, TENSORBOARD_LOGS_PATH
from run_baselines import play_baseline

root = "../"

### Parameters

In [2]:
args = {
    'task': "context_free",
    'valid_proportion': 0.50,
    'test_proportion': 0.50,
    'batch_size': 64,
    'cross_validation': False,
    'short': False,
    'experiment': "notebook_test"
}

In [3]:
task_name = args['task']
valid_proportion = args['valid_proportion']
test_proportion = args['test_proportion']
batch_size = args['batch_size']
cross_validation = args['cross_validation']
short = args['short']
experiment_name = args['experiment']

### Load the data

In [5]:
task = load_task(task_name=task_name,
                 valid_proportion=valid_proportion,
                 test_proportion=test_proportion,
                 batch_size=batch_size,
                 cross_validation=cross_validation,
                 short=short,
                 folder_path=root + MODELING_TASK_RESULTS_PATH)

Task loaded from ../results/modeling_task/contextfree_00-50-50_bs64.pkl.



# Run the baselines
## Basic baselines
### Models

In [6]:
model_names = [
    "random",
    "frequency",
    "summaries_count",
    "summaries_unique_count",
    "summaries_overlap",
    "activated_summaries",
    "context_count",
    "context_unique_count",
    "summaries_context_count",
    "summaries_context_unique_count",
    "summaries_context_overlap",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Run the models

In [9]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=None,
                                        pretrained_model_dim=None,
                                        tensorboard_logs_path=root + TENSORBOARD_LOGS_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=BASELINES_RANDOM_SEED)
    
    play_baseline(task=task,
                  model=model)

Random
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.00656 (+/-0.01645)
recall_at_10: 0.01086 (+/-0.06410)
reciprocal_best_rank: 0.01279 (+/-0.04189)
reciprocal_average_rank: 0.00157 (+/-0.00182)
ndcg_at_10: 0.00578 (+/-0.03459)

Frequency
Learning answers counts...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.13871 (+/-0.16313)
recall_at_10: 0.26007 (+/-0.28032)
reciprocal_best_rank: 0.28207 (+/-0.33272)
reciprocal_average_rank: 0.02976 (+/-0.08948)
ndcg_at_10: 0.19541 (+/-0.21750)

SummariesCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.05414 (+/-0.12075)
recall_at_10: 0.08484 (+/-0.19167)
reciprocal_best_rank: 0.11899 (+/-0.25907)
reciprocal_average_rank: 0.00781 (+/-0.03646)
ndcg_at_10: 0.07081 (+/-0.16391)

SummariesUniqueCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03113 (+/-0.07896)
recall_at_10: 0.04929 (+/-0.13896)
reciprocal_best_rank: 0.07250 (+/-0.19717)
reciprocal_average_rank: 0.00469 (+/-0.02100)
ndcg_at_10: 0.03831 (+/-0.11311)

SummariesOverlap
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.04423 (+/-0.10854)
recall_at_10: 0.06821 (+/-0.17138)
reciprocal_best_rank: 0.10377 (+/-0.25450)
reciprocal_average_rank: 0.00696 (+/-0.03767)
ndcg_at_10: 0.05848 (+/-0.14888)

ActivatedSummaries
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03478 (+/-0.08745)
recall_at_10: 0.05308 (+/-0.15377)
reciprocal_best_rank: 0.07412 (+/-0.19576)
reciprocal_average_rank: 0.00620 (+/-0.02654)
ndcg_at_10: 0.04094 (+/-0.12125)

ContextCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.04787 (+/-0.12723)
recall_at_10: 0.07289 (+/-0.18669)
reciprocal_best_rank: 0.10415 (+/-0.25750)
reciprocal_average_rank: 0.00809 (+/-0.04676)
ndcg_at_10: 0.06214 (+/-0.16440)

ContextUniqueCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03238 (+/-0.08262)
recall_at_10: 0.05718 (+/-0.15401)
reciprocal_best_rank: 0.07741 (+/-0.21089)
reciprocal_average_rank: 0.00458 (+/-0.01787)
ndcg_at_10: 0.04391 (+/-0.12065)

SummariesContextCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.06734 (+/-0.13943)
recall_at_10: 0.10738 (+/-0.21417)
reciprocal_best_rank: 0.15018 (+/-0.29761)
reciprocal_average_rank: 0.01087 (+/-0.05422)
ndcg_at_10: 0.08963 (+/-0.18331)

SummariesContextUniqueCount
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.03252 (+/-0.07876)
recall_at_10: 0.05534 (+/-0.14403)
reciprocal_best_rank: 0.07552 (+/-0.19580)
reciprocal_average_rank: 0.00501 (+/-0.02313)
ndcg_at_10: 0.04106 (+/-0.11240)

SummariesContextOverlap
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.04035 (+/-0.09052)
recall_at_10: 0.06828 (+/-0.15886)
reciprocal_best_rank: 0.09713 (+/-0.22905)
reciprocal_average_rank: 0.00570 (+/-0.02498)
ndcg_at_10: 0.05350 (+/-0.12919)



## Embedding baselines
### Models

In [8]:
model_names = [
    "summaries_average_embedding",
    "summaries_overlap_average_embedding",
    "context_average_embedding",
    "summaries_context_average_embedding",
    "summaries_context_overlap_average_embedding",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Pretrained model

In [9]:
word_embedding, word_embedding_dim = get_pretrained_model(pretrained_model_name="word2vec",
                                                          folder_path=root + PRETRAINED_MODELS_PATH)

Word2Vec embedding loaded.



### Run the models

In [10]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=word_embedding,
                                        pretrained_model_dim=word_embedding_dim,
                                        tensorboard_logs_path=root + TENSORBOARD_LOGS_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=BASELINES_RANDOM_SEED)
    
    play_baseline(task=task,
                  model=model)

SummariesAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.07567 (+/-0.16339)
recall_at_10: 0.11259 (+/-0.23749)
reciprocal_best_rank: 0.13237 (+/-0.26999)
reciprocal_average_rank: 0.02211 (+/-0.08810)
ndcg_at_10: 0.08923 (+/-0.20029)

SummariesOverlapAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.09888 (+/-0.18520)
recall_at_10: 0.15614 (+/-0.27600)
reciprocal_best_rank: 0.17499 (+/-0.31296)
reciprocal_average_rank: 0.02791 (+/-0.09596)
ndcg_at_10: 0.12350 (+/-0.22850)

ContextAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.05171 (+/-0.12502)
recall_at_10: 0.08281 (+/-0.20065)
reciprocal_best_rank: 0.09942 (+/-0.23242)
reciprocal_average_rank: 0.01172 (+/-0.05311)
ndcg_at_10: 0.06359 (+/-0.16303)

SummariesContextAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.07654 (+/-0.16248)
recall_at_10: 0.12131 (+/-0.23946)
reciprocal_best_rank: 0.14094 (+/-0.28233)
reciprocal_average_rank: 0.02109 (+/-0.08930)
ndcg_at_10: 0.09479 (+/-0.20070)

SummariesContextOverlapAverageEmbedding
Validation of the model...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.05561 (+/-0.12649)
recall_at_10: 0.08966 (+/-0.20696)
reciprocal_best_rank: 0.10563 (+/-0.23656)
reciprocal_average_rank: 0.01372 (+/-0.06178)
ndcg_at_10: 0.06754 (+/-0.16446)



## BART baselines
### Models

In [None]:
model_names = [
    "summaries_bart_mnli",
    "context_bart_mnli",
    "summaries_context_bart_mnli",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Pretrained model

In [13]:
bart_mnli, _ = get_pretrained_model(pretrained_model_name="bart_mnli",
                                    folder_path=root + PRETRAINED_MODELS_PATH)

loading archive file ../modeling/pretrained_models/bart.large.mnli
| dictionary: 50264 types
Registering classification head: mnli
Pretrained BART.mnli loaded.



### Run the models

In [None]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=bart_mnli,
                                        pretrained_model_dim=None,
                                        tensorboard_logs_path=root + TENSORBOARD_LOGS_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=BASELINES_RANDOM_SEED)

    play_baseline(task=task,
                  model=model)

SummariesAverageEmbedding
Validation of the model...



HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


Validation Score: 0.07634
Scores evaluated on the validation set:
average_precision: 0.07634
recall_at_10: 0.11513
reciprocal_best_rank: 0.13253
reciprocal_average_rank: 0.02128
ndcg_at_10: 0.09071


SummariesOverlapAverageEmbedding
Validation of the model...



HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


Validation Score: 0.10399
Scores evaluated on the validation set:
average_precision: 0.10399
recall_at_10: 0.15489
reciprocal_best_rank: 0.19313
reciprocal_average_rank: 0.02827
ndcg_at_10: 0.12905


ContextAverageEmbedding
Validation of the model...



HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))