# Setup
### Imports

In [2]:
import sys
sys.path.append('../')
del sys

%reload_ext autoreload
%autoreload 2

import modeling.models as models
from toolbox.utils import to_class_name, load_task
from toolbox.parameters import SCORES_NAMES, BASELINES_RANDOM_SEED
from toolbox.paths import MODELING_TASK_RESULTS_PATH, PRETRAINED_MODELS_PATH, TENSORBOARD_LOGS_PATH
from run_baselines import get_word2vec, play_baseline

root = "../"

### Parameters

In [8]:
args = {
    'task': "context_free_same_type",
    'valid_proportion': 0.25,
    'test_proportion': 0.25,
    'ranking_size': 32,
    'batch_size': 16,
    'cross_validation': False,
    'short': False,
    'experiment': None
}

In [9]:
task_name = args['task']
valid_proportion = args['valid_proportion']
test_proportion = args['test_proportion']
ranking_size = args['ranking_size']
batch_size = args['batch_size']
cross_validation = args['cross_validation']
short = args['short']
experiment_name = args['experiment']

### Load the data

In [10]:
task = load_task(task_name=task_name,
                 valid_proportion=valid_proportion,
                 test_proportion=test_proportion,
                 ranking_size=ranking_size,
                 batch_size=batch_size,
                 cross_validation=cross_validation,
                 short=short,
                 folder_path=root + MODELING_TASK_RESULTS_PATH)

Task loaded from ../results/modeling_task/contextfreesametype_50-25-25_rs32_bs16.pkl.



# Run the baselines
## Basic baselines
### Models

In [11]:
model_names = [
    "random",
    "frequency",
    "summaries_count",
    "summaries_unique_count",
    "summaries_overlap",
    "activated_summaries",
    "context_count",
    "context_unique_count",
    "summaries_context_count",
    "summaries_context_unique_count",
    "summaries_context_overlap",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Run the models

In [12]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=None,
                                        pretrained_model_dim=None,
                                        tensorboard_logs_path=root + TENSORBOARD_LOGS_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=BASELINES_RANDOM_SEED)
    
    play_baseline(task=task,
                  model=model)

Random
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.18683 (+/-0.14092)
recall_at_10: 0.30759 (+/-0.28145)
reciprocal_best_rank: 0.26042 (+/-0.28801)
reciprocal_average_rank: 0.07259 (+/-0.05742)
ndcg_at_10: 0.19963 (+/-0.20058)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.18822 (+/-0.14156)
recall_at_10: 0.30993 (+/-0.27925)
reciprocal_best_rank: 0.26700 (+/-0.28845)
reciprocal_average_rank: 0.07424 (+/-0.06435)
ndcg_at_10: 0.20197 (+/-0.20103)

Frequency
Learning answers counts...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


Learning answers counts...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.58751 (+/-0.27316)
recall_at_10: 0.72921 (+/-0.28313)
reciprocal_best_rank: 0.78478 (+/-0.33351)
reciprocal_average_rank: 0.24159 (+/-0.21305)
ndcg_at_10: 0.66028 (+/-0.27743)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.57506 (+/-0.28160)
recall_at_10: 0.73441 (+/-0.28139)
reciprocal_best_rank: 0.76143 (+/-0.33987)
reciprocal_average_rank: 0.23969 (+/-0.20149)
ndcg_at_10: 0.65044 (+/-0.28110)

SummariesCount
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.32107 (+/-0.25113)
recall_at_10: 0.46609 (+/-0.32867)
reciprocal_best_rank: 0.45262 (+/-0.39041)
reciprocal_average_rank: 0.11416 (+/-0.11551)
ndcg_at_10: 0.36545 (+/-0.29349)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.31318 (+/-0.26009)
recall_at_10: 0.43856 (+/-0.34079)
reciprocal_best_rank: 0.44287 (+/-0.39674)
reciprocal_average_rank: 0.11840 (+/-0.13713)
ndcg_at_10: 0.34900 (+/-0.30720)

SummariesUniqueCount
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.29460 (+/-0.22668)
recall_at_10: 0.46409 (+/-0.32439)
reciprocal_best_rank: 0.41249 (+/-0.36534)
reciprocal_average_rank: 0.10709 (+/-0.10340)
ndcg_at_10: 0.34361 (+/-0.27382)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.28902 (+/-0.24626)
recall_at_10: 0.43097 (+/-0.33723)
reciprocal_best_rank: 0.39058 (+/-0.36918)
reciprocal_average_rank: 0.11314 (+/-0.12783)
ndcg_at_10: 0.32377 (+/-0.29305)

SummariesOverlap
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.30177 (+/-0.24245)
recall_at_10: 0.43872 (+/-0.32607)
reciprocal_best_rank: 0.43090 (+/-0.38773)
reciprocal_average_rank: 0.10783 (+/-0.11366)
ndcg_at_10: 0.34188 (+/-0.28944)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.28605 (+/-0.24087)
recall_at_10: 0.40947 (+/-0.34421)
reciprocal_best_rank: 0.40358 (+/-0.38101)
reciprocal_average_rank: 0.10945 (+/-0.12433)
ndcg_at_10: 0.31630 (+/-0.29410)

ActivatedSummaries
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.30403 (+/-0.23552)
recall_at_10: 0.46569 (+/-0.32729)
reciprocal_best_rank: 0.42068 (+/-0.37103)
reciprocal_average_rank: 0.11196 (+/-0.11664)
ndcg_at_10: 0.35109 (+/-0.28114)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.29713 (+/-0.25104)
recall_at_10: 0.43526 (+/-0.33953)
reciprocal_best_rank: 0.40497 (+/-0.37811)
reciprocal_average_rank: 0.11569 (+/-0.13693)
ndcg_at_10: 0.33260 (+/-0.29783)

ContextCount
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.27936 (+/-0.22475)
recall_at_10: 0.41147 (+/-0.31335)
reciprocal_best_rank: 0.40909 (+/-0.37698)
reciprocal_average_rank: 0.10055 (+/-0.10374)
ndcg_at_10: 0.31523 (+/-0.27296)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.28345 (+/-0.23635)
recall_at_10: 0.39547 (+/-0.32313)
reciprocal_best_rank: 0.41298 (+/-0.38842)
reciprocal_average_rank: 0.10330 (+/-0.11825)
ndcg_at_10: 0.31206 (+/-0.28547)

ContextUniqueCount
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.27568 (+/-0.22383)
recall_at_10: 0.41149 (+/-0.31464)
reciprocal_best_rank: 0.40006 (+/-0.37073)
reciprocal_average_rank: 0.10083 (+/-0.10474)
ndcg_at_10: 0.31156 (+/-0.27188)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.27783 (+/-0.22915)
recall_at_10: 0.39516 (+/-0.32311)
reciprocal_best_rank: 0.40719 (+/-0.38325)
reciprocal_average_rank: 0.10115 (+/-0.11131)
ndcg_at_10: 0.30748 (+/-0.28039)

SummariesContextCount
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.34807 (+/-0.25448)
recall_at_10: 0.50056 (+/-0.33052)
reciprocal_best_rank: 0.49315 (+/-0.39454)
reciprocal_average_rank: 0.12403 (+/-0.12396)
ndcg_at_10: 0.40012 (+/-0.29466)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.33870 (+/-0.26919)
recall_at_10: 0.46325 (+/-0.33430)
reciprocal_best_rank: 0.48178 (+/-0.40122)
reciprocal_average_rank: 0.12708 (+/-0.14914)
ndcg_at_10: 0.37829 (+/-0.31032)

SummariesContextUniqueCount
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.31336 (+/-0.23158)
recall_at_10: 0.49484 (+/-0.32395)
reciprocal_best_rank: 0.43919 (+/-0.37026)
reciprocal_average_rank: 0.11599 (+/-0.11293)
ndcg_at_10: 0.36940 (+/-0.27666)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.30822 (+/-0.24932)
recall_at_10: 0.45587 (+/-0.33071)
reciprocal_best_rank: 0.43142 (+/-0.38050)
reciprocal_average_rank: 0.11724 (+/-0.12778)
ndcg_at_10: 0.34978 (+/-0.29386)

SummariesContextOverlap
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.33248 (+/-0.24888)
recall_at_10: 0.49022 (+/-0.32432)
reciprocal_best_rank: 0.47524 (+/-0.39077)
reciprocal_average_rank: 0.11857 (+/-0.12112)
ndcg_at_10: 0.38540 (+/-0.29199)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.31321 (+/-0.24521)
recall_at_10: 0.46016 (+/-0.33304)
reciprocal_best_rank: 0.44899 (+/-0.38403)
reciprocal_average_rank: 0.11689 (+/-0.12650)
ndcg_at_10: 0.35773 (+/-0.29028)



## Embedding baselines
### Models

In [13]:
model_names = [
    "summaries_average_embedding",
    "summaries_overlap_average_embedding",
    "context_average_embedding",
    "summaries_context_average_embedding",
    "summaries_context_overlap_average_embedding",
]
model_names = [to_class_name(model_name) for model_name in model_names]

### Pretrained model

In [14]:
word_embedding, word_embedding_dim = get_word2vec(root + PRETRAINED_MODELS_PATH)

Word2Vec embedding loaded.



### Run the models

In [15]:
for model_name in model_names:
    print(model_name)
    
    model = getattr(models, model_name)(scores_names=SCORES_NAMES,
                                        relevance_level=task.relevance_level,
                                        pretrained_model=word_embedding,
                                        pretrained_model_dim=word_embedding_dim,
                                        tensorboard_logs_path=root + TENSORBOARD_LOGS_PATH,
                                        experiment_name=experiment_name,
                                        random_seed=BASELINES_RANDOM_SEED)
    
    play_baseline(task=task,
                  model=model)

SummariesAverageEmbedding
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.36688 (+/-0.28409)
recall_at_10: 0.52673 (+/-0.36948)
reciprocal_best_rank: 0.47857 (+/-0.39334)
reciprocal_average_rank: 0.14868 (+/-0.14804)
ndcg_at_10: 0.41243 (+/-0.32645)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.36138 (+/-0.28554)
recall_at_10: 0.51195 (+/-0.37084)
reciprocal_best_rank: 0.48076 (+/-0.39385)
reciprocal_average_rank: 0.15073 (+/-0.15925)
ndcg_at_10: 0.40486 (+/-0.33147)

SummariesOverlapAverageEmbedding
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.39951 (+/-0.29889)
recall_at_10: 0.54444 (+/-0.37120)
reciprocal_best_rank: 0.52858 (+/-0.40244)
reciprocal_average_rank: 0.16567 (+/-0.17594)
ndcg_at_10: 0.44621 (+/-0.33591)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.38106 (+/-0.30522)
recall_at_10: 0.50772 (+/-0.37740)
reciprocal_best_rank: 0.49513 (+/-0.39237)
reciprocal_average_rank: 0.16551 (+/-0.18520)
ndcg_at_10: 0.41682 (+/-0.34393)

ContextAverageEmbedding
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.33555 (+/-0.25027)
recall_at_10: 0.51887 (+/-0.34266)
reciprocal_best_rank: 0.45548 (+/-0.37537)
reciprocal_average_rank: 0.13013 (+/-0.12210)
ndcg_at_10: 0.38873 (+/-0.29417)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.32722 (+/-0.25867)
recall_at_10: 0.48353 (+/-0.35100)
reciprocal_best_rank: 0.44108 (+/-0.38425)
reciprocal_average_rank: 0.12993 (+/-0.13491)
ndcg_at_10: 0.36990 (+/-0.30321)

SummariesContextAverageEmbedding
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.36129 (+/-0.27428)
recall_at_10: 0.53323 (+/-0.35243)
reciprocal_best_rank: 0.47609 (+/-0.38725)
reciprocal_average_rank: 0.14402 (+/-0.14181)
ndcg_at_10: 0.41135 (+/-0.31441)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.35403 (+/-0.27883)
recall_at_10: 0.52394 (+/-0.35645)
reciprocal_best_rank: 0.47990 (+/-0.39911)
reciprocal_average_rank: 0.14163 (+/-0.14563)
ndcg_at_10: 0.40377 (+/-0.31906)

SummariesContextOverlapAverageEmbedding
Evaluation on the train_loader...


HBox(children=(FloatProgress(value=0.0, max=670.0), HTML(value='')))


average_precision: 0.34506 (+/-0.25933)
recall_at_10: 0.52742 (+/-0.35222)
reciprocal_best_rank: 0.46210 (+/-0.37734)
reciprocal_average_rank: 0.13755 (+/-0.13607)
ndcg_at_10: 0.39817 (+/-0.30343)

Evaluation on the valid_loader...


HBox(children=(FloatProgress(value=0.0, max=335.0), HTML(value='')))


average_precision: 0.33413 (+/-0.26309)
recall_at_10: 0.49134 (+/-0.35264)
reciprocal_best_rank: 0.45293 (+/-0.38732)
reciprocal_average_rank: 0.13269 (+/-0.13359)
ndcg_at_10: 0.37797 (+/-0.30741)

