# Notebook to test ranking model proposals by Out-of-bag errors

In [1]:
%load_ext autoreload
%autoreload 2

## Load basic resources

In [2]:
from data import load_data

train, val, test = load_data()

In [3]:
full = train.append(val)

## Load all ensemble models validation proposals and ground truths

In [3]:
ensemble_model_paths = [
    'good_models/BART-base-ensemble-submission-48',
    'good_models/t5-small-ensemble-submission-60',
    'good_models/t5-base-ensemble-submission-55',
    'good_models/pegasus-large-ensemble-submission-58',
    'good_models/BART-large-ensemble-submission-56'
]

In [4]:
from glob import glob
import os
from postprocessing import read_proposals_files
from model import dcg

def load_ensemble_data(ensemble_model_path):
    ensemble_data = []
    # Load out-of-bag prediction files
    val_pred_files = glob(f"{ensemble_model_path}/predictions_*_val.csv")
    for val_pred_file in val_pred_files:
        # Extract validation fold number
        file_name = os.path.split(val_pred_file)[-1]
        data_fold = int(file_name.split("_")[1])
        # Load validation data fold
        _, fold_val, _ = load_data(data_fold)
        # Load proposals
        val_proposals = read_proposals_files([f"{ensemble_model_path}/{file_name}"])
        # Measure DCG score
        dcg_score = dcg(val_proposals, fold_val["name"])
        # Load test proposals
        test_proposals = read_proposals_files([f"{ensemble_model_path}/submission_{data_fold}.csv"])
        # Create ensemble info structure
        ensemble_data.append({
            "model_path": ensemble_model_path,
            "fold": data_fold,
            "oob_proposals": val_proposals,
            "oob_dcg": dcg_score,
            "test_proposals": test_proposals
        })
    return ensemble_data

In [5]:
from itertools import chain

ensemble_data = list(chain(*[
    load_ensemble_data(ensemble_model_path)
    for ensemble_model_path in ensemble_model_paths
]))

In [6]:
len(ensemble_data)

78

In [7]:
[d["oob_dcg"] for d in ensemble_data]

[27.761482866596822,
 22.134362187021228,
 25.814318058725792,
 26.981717149603675,
 28.835125475015253,
 26.460635257033598,
 22.607797450353527,
 27.964762896600803,
 24.566714074530875,
 24.74551765960943,
 30.87277479208958,
 24.38583933787897,
 26.5844666945208,
 24.83720755480822,
 28.760236525061966,
 22.924842271307828,
 22.93968224657691,
 26.699268316030754,
 25.418444915364347,
 23.275639519164766,
 23.36701903028415,
 19.124995777689005,
 17.33391424056596,
 18.814274810623626,
 19.47798738813132,
 18.722060513858928,
 19.191616045342823,
 18.35910275299725,
 18.145305489009328,
 18.375858838476226,
 18.299430598735746,
 18.82863061300133,
 18.62724804631559,
 21.3528799826526,
 20.00362355750168,
 21.558577743607195,
 21.58592444688766,
 20.30133551727355,
 22.103184957856335,
 21.27770670407128,
 21.431230620186394,
 20.40517947046983,
 20.16359026213937,
 21.51988114063897,
 21.29552923637249,
 23.314498715528305,
 19.770129975261934,
 23.199355809765372,
 21.12812979393

## Generate ensemble proposals, ordering them by OOB DCG score

In [12]:
sorted_models = sorted(ensemble_data, key=lambda model: model["oob_dcg"], reverse=True)

In [13]:
def combine_proposals(models):
    proposals = [[] for _ in range(len(models[0]["test_proposals"]))]
    for model in models:
        proposals = [p + added for p, added in zip(proposals, model["test_proposals"])]
    return proposals

In [14]:
from ranker import majority_ranking

majority_ranked_proposals = majority_ranking(combine_proposals(sorted_models))

In [15]:
majority_ranked_proposals

[['knit dress with lace trim',
  'lace-trimmed dress',
  'knit dress with lace detail',
  'lace dress trf',
  'knit dress with lace',
  'combined lace dress',
  'knit dress with lace trf',
  'camisole dress',
  'combined lace dress trf',
  'knit dress with matching lace'],
 ['pleated dress trf',
  'printed dress trf',
  'floral print dress trf',
  'flowing dress trf',
  'pleated midi dress',
  'floral print dress',
  'flowing dress with pleats',
  'textured dress with pleats',
  'pleated dress',
  'oversized dress trf'],
 ['nautical cap',
  'striped nautical cap',
  'nautical cap with peak',
  'textured nautical cap',
  'check nautical cap',
  'sporty nautical cap',
  'faux leather nautical cap',
  'limited edition nautical cap',
  'seashell nautical cap',
  'herringbone nautical cap'],
 ['textured nautical cap',
  'nautical cap',
  'limited edition nautical cap',
  'technical nautical cap',
  'faux leather nautical cap',
  'nautical cap with strap',
  'striped nautical cap',
  'combin

In [16]:
from postprocessing import save_submission

submission_name = "submission_xx"
save_submission(majority_ranked_proposals, submission_name)
save_submission(majority_ranked_proposals, submission_name, zip=False)

## Generate ensemble proposals weighing them by OOB DCG score

In [17]:
from collections import defaultdict

def rank_proposals_weights(proposals_lists, weights, proposals_limit=10):
    """Joins and ranks several proposals lists. Each proposal list can have a different weight.
    
    The top proposals with largest weight in the mix are returned, ordered by weight.
    """
    # Mix all proposals for the same input, summing up model weights
    mixed_proposals = [defaultdict(lambda: 0) for _ in range(len(proposals_lists[0]))]
    for proposals_list, weight in zip(proposals_lists, weights):
        for i, proposals in enumerate(proposals_list):
            for key in proposals:
                mixed_proposals[i][key] += weight
    # Sorte proposals by weights, select top
    sorted_proposals = [
        sorted(mixed_proposal, key=lambda x: mixed_proposal[x], reverse=True)[:proposals_limit]
        for mixed_proposal in mixed_proposals
    ]
    return sorted_proposals

In [18]:
dcg_ranked_proposals = rank_proposals_weights(
    [d["test_proposals"] for d in ensemble_data],
    [d["oob_dcg"] for d in ensemble_data], 
)
dcg_ranked_proposals

[['knit dress with lace trim',
  'lace-trimmed dress',
  'lace dress trf',
  'knit dress with lace trf',
  'combined lace dress',
  'knit dress with lace detail',
  'camisole dress',
  'knit dress with lace',
  'combined lace dress trf',
  'contrast lace dress'],
 ['pleated dress trf',
  'printed dress trf',
  'flowing dress trf',
  'floral print dress trf',
  'pleated midi dress',
  'floral print dress',
  'flowing dress with pleats',
  'pleated midi dress trf',
  'textured dress with pleats',
  'pleated dress'],
 ['nautical cap',
  'striped nautical cap',
  'nautical cap with peak',
  'textured nautical cap',
  'check nautical cap',
  'sporty nautical cap',
  'faux leather nautical cap',
  'limited edition nautical cap',
  'herringbone nautical cap',
  'seashell nautical cap'],
 ['textured nautical cap',
  'nautical cap',
  'technical nautical cap',
  'limited edition nautical cap',
  'faux leather nautical cap',
  'nautical cap with strap',
  'striped nautical cap',
  'combined naut

In [19]:
from postprocessing import save_submission

submission_name = "submission_66"
save_submission(dcg_ranked_proposals, submission_name)
save_submission(dcg_ranked_proposals, submission_name, zip=False)

## Generate ensemble proposals weighing models by OOB DCG score, proposals inside models by DCG rank

In [25]:
from collections import defaultdict
from ranker import add_dcg_weights

def rank_proposals_weights_dcg_local(proposals_lists, weights, proposals_limit=10):
    """Joins and ranks several proposals lists. Each proposal list can have a different weight.
    
    Within each list a local DCG ranking weighing is performed.
    
    The top proposals with largest weight in the mix are returned, ordered by weight.
    """
    # Mix all proposals for the same input, summing up model weights
    mixed_proposals = [defaultdict(lambda: 0) for _ in range(len(proposals_lists[0]))]
    for proposals_list, weight in zip(proposals_lists, weights):
        for i, proposals in enumerate(proposals_list):
            weighted_proposals = add_dcg_weights(proposals)
            for local_weight, key in weighted_proposals:
                mixed_proposals[i][key] += weight * local_weight
    # Sorte proposals by weights, select top
    sorted_proposals = [
        sorted(mixed_proposal, key=lambda x: mixed_proposal[x], reverse=True)[:proposals_limit]
        for mixed_proposal in mixed_proposals
    ]
    return sorted_proposals

In [26]:
dcg_local_ranked_proposals = rank_proposals_weights_dcg_local(
    [d["test_proposals"] for d in ensemble_data],
    [d["oob_dcg"] for d in ensemble_data], 
)
dcg_local_ranked_proposals

[['lace dress trf',
  'lace-trimmed dress',
  'knit dress with lace trim',
  'combined lace dress',
  'knit dress with lace',
  'knit dress with lace detail',
  'pleated camisole dress',
  'knit dress with lace trf',
  'strappy camisole dress',
  'contrast lace dress trf'],
 ['flowing dress trf',
  'pleated midi dress',
  'printed dress trf',
  'pleated dress trf',
  'floral print dress trf',
  'oversized dress trf',
  'flowing dress with pleats',
  'long pleated dress trf',
  'long dress with pleats trf',
  'floral print dress'],
 ['nautical cap',
  'nautical cap with peak',
  'faux leather nautical cap',
  'striped nautical cap',
  'textured nautical cap',
  'check nautical cap',
  'sporty nautical cap',
  'limited edition nautical cap',
  'herringbone nautical cap',
  'printed nautical cap'],
 ['technical nautical cap',
  'nautical cap',
  'faux leather nautical cap',
  'textured nautical cap',
  'limited edition nautical cap',
  'combined nautical cap',
  'nautical cap with strap',

In [28]:
from postprocessing import save_submission

submission_name = "submission_XX"
save_submission(dcg_local_ranked_proposals, submission_name)
save_submission(dcg_local_ranked_proposals, submission_name, zip=False)

## Bayes-optimized weights over ensemble OOB errors

In [8]:
ensemble_data[0].keys()

dict_keys(['model_path', 'fold', 'oob_proposals', 'oob_dcg', 'test_proposals'])

Recover all validation folds

In [9]:
all_folds = {model["fold"] for model in ensemble_data}
all_folds

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}

In [10]:
from data import load_train_val_idx

val_idxs = [load_train_val_idx(data_fold)[1] for data_fold in all_folds]

In [11]:
from data import load_full_train

full = load_full_train()

For each validation pattern, find which folds contribute to it

In [12]:
pattern_fold_contributers = [
    [fold for fold in all_folds if pattern in val_idxs[fold]]
    for pattern in range(len(full))
]

In [13]:
pattern_fold_contributers

[[10],
 [13, 16],
 [8, 11],
 [11, 17],
 [6, 9, 19],
 [15, 18],
 [17, 18],
 [1, 20],
 [7],
 [8, 18, 19],
 [0, 4, 10, 16, 17, 18],
 [8, 9, 20],
 [12, 18],
 [2, 5],
 [6, 13, 20],
 [16],
 [4, 15, 18, 19],
 [],
 [2, 5],
 [5, 6],
 [15],
 [4, 5, 6, 15, 21],
 [0, 5, 6, 10, 15],
 [17],
 [6],
 [11],
 [3],
 [8, 9, 19],
 [19],
 [7, 16],
 [9, 13, 16],
 [5, 14, 20],
 [0, 1, 14, 19],
 [2, 9, 11, 14, 16],
 [11, 15],
 [19],
 [19],
 [7, 19],
 [14],
 [0, 6, 11],
 [8, 16],
 [10, 13],
 [10],
 [10, 15],
 [0, 21],
 [15, 17, 20],
 [],
 [5, 18],
 [11, 12],
 [4, 5, 21],
 [12],
 [9, 19],
 [16, 19],
 [1],
 [3],
 [2, 4, 6, 18],
 [9, 16, 20],
 [1, 9],
 [],
 [1, 3, 9, 14],
 [12, 16, 19, 21],
 [17],
 [3, 8, 12, 19],
 [3, 7, 20],
 [6],
 [15, 18],
 [10, 12],
 [14],
 [3, 20, 21],
 [0, 2],
 [6, 7, 9],
 [7],
 [17, 19, 20, 21],
 [3, 6, 7, 16],
 [0, 2, 5],
 [11, 20],
 [6, 9],
 [18],
 [5, 12, 14],
 [14],
 [3, 7],
 [16],
 [6, 19, 20],
 [4, 7, 16],
 [2],
 [9],
 [13, 14],
 [4, 7],
 [4, 7],
 [1, 8, 21],
 [6, 20],
 [4, 6, 16, 21]

For each validation patterns, find which models contribute to it

In [14]:
pattern_model_constributers = [
    [
        model_id for model_id in range(len(ensemble_data))
        if ensemble_data[model_id]["fold"] in pattern_folds
    ]
    for pattern_folds in pattern_fold_contributers
]

In [15]:
pattern_model_constributers

[[8, 27, 39, 50, 64],
 [11, 20, 67, 77],
 [4, 5, 24, 25, 36, 37, 48, 49, 60, 61],
 [4, 18, 24, 36, 48, 60, 75],
 [1, 6, 14, 22, 31, 34, 43, 46, 53, 57, 62, 71],
 [3, 15, 59, 72],
 [3, 18, 59, 75],
 [7, 16, 26, 38, 63, 73],
 [0, 21, 33, 45, 56],
 [3, 5, 6, 25, 37, 49, 59, 61, 62],
 [2,
  3,
  8,
  18,
  19,
  20,
  23,
  27,
  32,
  35,
  39,
  44,
  47,
  50,
  55,
  58,
  59,
  64,
  75,
  76,
  77],
 [5, 14, 16, 25, 31, 37, 43, 49, 53, 61, 71, 73],
 [3, 17, 54, 59, 74],
 [9, 13, 28, 30, 40, 42, 51, 65, 69],
 [1, 11, 16, 22, 34, 46, 57, 67, 73],
 [20, 77],
 [2, 3, 6, 15, 23, 35, 47, 58, 59, 62, 72],
 [],
 [9, 13, 28, 30, 40, 42, 51, 65, 69],
 [1, 9, 22, 28, 34, 40, 46, 51, 57, 65],
 [15, 72],
 [1, 2, 9, 15, 22, 23, 28, 34, 35, 40, 46, 47, 51, 57, 58, 65, 70, 72],
 [1,
  8,
  9,
  15,
  19,
  22,
  27,
  28,
  32,
  34,
  39,
  40,
  44,
  46,
  50,
  51,
  55,
  57,
  64,
  65,
  72,
  76],
 [18, 75],
 [1, 22, 34, 46, 57],
 [4, 24, 36, 48, 60],
 [10, 29, 41, 52, 66],
 [5, 6, 14, 25, 3

Check this all makes sense

In [16]:
#for pattern_id in range(len(full)):
#    folds = pattern_fold_contributers[pattern_id]
#    for fold in folds:
#        _, val, _ = load_data(fold)
#        print(pattern_id, fold)
#        assert full.iloc[pattern_id]["description"] in val["description"].values

Create structure with all OOB suggestions for each pattern, indexed by model

In [17]:
oob_suggestions_by_model = [
    {
        model: ensemble_data[model]["oob_proposals"][list(val_idxs[ensemble_data[model]["fold"]]).index(pattern_idx)]
        for model in models
    }
    for pattern_idx, models in enumerate(pattern_model_constributers)
]

In [18]:
oob_suggestions_by_model[0]

{8: ['braided maxi tote bag',
  'braided tote bag with studs',
  'textured braided tote bag',
  'braided tote bag',
  'braided studded tote bag',
  'studded braided tote bag',
  'fabric tote bag with studs',
  'strappy braided tote bag',
  'contrast braided tote bag',
  'plaited braided tote bag'],
 27: ['contrasting braided tote bag',
  'braided metallic tote bag',
  'textured braided tote bag',
  'limited edition braided tote bag',
  'braided tote bag',
  'striped braided tote bag',
  'braided contrast tote bag',
  'braided tote bag with purse',
  'contrast braided tote bag',
  'braided tote bag with pocket'],
 39: ['braided maxi tote bag',
  'contrasting braided tote bag',
  'tote bag with braided exterior',
  'textured braided tote bag',
  'limited edition braided tote bag',
  'braided tote bag',
  'striped braided tote bag',
  'studded braided tote bag',
  'contrast braided tote bag',
  'braided tote bag with purse'],
 50: ['contrasting braided tote bag',
  'braided tote bag with 

Cost function for given combination weights

In [19]:
from collections import defaultdict

def rank_proposals_weights_dicts(proposals_dicts, model_weights, proposals_limit=10):
    """Joins and ranks several proposals dictionaries. Each model can have a different weight.
    
    The top proposals with largest weight in the mix are returned, ordered by weight.
    """
    # Mix all proposals for the same input, summing up model weights
    mixed_proposals = []
    for proposals_dict in proposals_dicts:
        mixed_proposals_pattern = defaultdict(lambda: 0)
        for model, proposals in proposals_dict.items():
            for key in proposals:
                mixed_proposals_pattern[key] += model_weights[model]
        mixed_proposals.append(mixed_proposals_pattern)
    # Sorte proposals by weights, select top
    sorted_proposals = [
        sorted(mixed_proposal, key=lambda x: mixed_proposal[x], reverse=True)[:proposals_limit]
        for mixed_proposal in mixed_proposals
    ]
    return sorted_proposals

In [20]:
def ensemble_error(model_weights, dataset, oob_suggestions_by_model):
    # Get weighted OOB proposals
    proposals = rank_proposals_weights_dicts(oob_suggestions_by_model, model_weights)
    # Measure error (reverse of DCG score)
    return -dcg(proposals, dataset["name"])

In [21]:
ensemble_error([1]*len(ensemble_data), full, oob_suggestions_by_model)

-36.46807544480735

Bayesian optimizer of weights

In [22]:
%%time
from functools import partial
from skopt import gp_minimize

results = gp_minimize(
    partial(ensemble_error, dataset=full, oob_suggestions_by_model=oob_suggestions_by_model),
    dimensions=[
        (0.0, 1.0)
        for _ in range(len(ensemble_data))
    ],
    n_calls=100
)

CPU times: user 2h 13min 2s, sys: 21min 12s, total: 2h 34min 15s
Wall time: 26min 50s


In [23]:
results.x_iters

[[0.19768414005444396,
  0.35164196718440127,
  0.7543446543178076,
  0.5167851222850365,
  0.8788193630266589,
  0.41063268038419243,
  0.39396552963573095,
  0.7600648517059748,
  0.26521131092778133,
  0.12697811516033233,
  0.13000598098569685,
  0.6391463742266374,
  0.384499507651043,
  0.25278672227540216,
  0.7638477662079921,
  0.9624019492065626,
  0.8292110852131215,
  0.017682234943924627,
  0.6838390389527459,
  0.38745445743602236,
  0.13339220929079112,
  0.4578337925943595,
  0.5787771719462222,
  0.9965742046145127,
  0.2317754618746542,
  0.24523603803414,
  0.9508942955918543,
  0.4893936266555483,
  0.2096568242560048,
  0.33952850362505843,
  0.4424329835612637,
  0.9294095338826855,
  0.3874008434718349,
  0.36718106467667305,
  0.7112295288738811,
  0.5370117066228903,
  0.5775293283506092,
  0.4333377347368177,
  0.5307443274576201,
  0.25116307440437396,
  0.14403997524524528,
  0.9571429749829949,
  0.5787297302708015,
  0.013213512658581731,
  0.4046031861934

In [24]:
results.func_vals

array([-35.20254288, -35.23347487, -35.35800739, -35.78885296,
       -35.67250636, -35.94069076, -35.69290886, -35.39481182,
       -35.72317709, -35.25471675, -36.17078102, -33.96009786,
       -34.82576007, -35.8483648 , -35.11774112, -36.3995146 ,
       -35.84256815, -36.14935815, -36.4677001 , -35.38140171,
       -36.38124299, -36.32686678, -35.73560528, -36.26121822,
       -34.96182668, -35.36300089, -34.72585937, -35.79709041,
       -35.83311131, -35.80331626, -36.60241082, -36.58316705,
       -36.53009093, -35.89787888, -35.7968937 , -36.89945192,
       -36.68851954, -36.05520197, -36.19856159, -36.43113124,
       -36.82232227, -36.45400752, -35.67862765, -36.30144588,
       -36.25490949, -36.72810643, -36.6505402 , -36.80917615,
       -35.73389795, -36.65655024, -36.53497148, -36.1669762 ,
       -36.37732211, -36.17955681, -35.91274629, -36.31808798,
       -36.09936893, -36.31501996, -36.58350353, -35.5544287 ,
       -36.11697471, -36.40590126, -36.74545681, -36.70

In [25]:
results.x

[1.0,
 0.44905033170217057,
 0.07391955500713727,
 0.895629084345445,
 1.0,
 1.0,
 0.7973486349949753,
 0.2800602864509074,
 1.0,
 0.7133686961242285,
 1.0,
 0.7104050555030641,
 0.7025912130335986,
 1.0,
 0.9249284164569438,
 0.05709121708268447,
 0.9248540280567108,
 1.0,
 0.3509268115035282,
 1.0,
 0.5503115198404648,
 0.5972887690530478,
 0.13186155781802247,
 0.8165550385143643,
 0.5934594834937214,
 0.4415125272927783,
 0.4019846382341568,
 0.22126263777771094,
 0.0,
 0.0,
 0.16066618355622822,
 0.269677511940521,
 0.45810331944558974,
 0.11747012779548988,
 0.4621081497895053,
 0.9150847843148545,
 0.5237407271342682,
 0.0,
 0.22815700822757648,
 0.952784251227543,
 0.27526578054847567,
 0.15580058510375908,
 0.2191232507825923,
 0.07688253134767964,
 0.4886466508919586,
 0.25695264950776436,
 0.0,
 0.38546801922347024,
 0.44474166948827404,
 0.9177824894215402,
 0.8374591696768897,
 0.38954497709160213,
 0.24002450027219438,
 0.5995042438558787,
 0.820763681390257,
 0.433564532

In [26]:
results.fun

-37.07570559719059

Save evaluated points

In [28]:
import pickle

with open("oob_optimization.pkl", "wb") as f:
    pickle.dump(results, f)

Best result: -37.0757

Generate submission with best mix weights

In [32]:
from collections import defaultdict

def rank_proposals_weights(proposals_lists, weights, proposals_limit=10):
    """Joins and ranks several proposals lists. Each proposal list can have a different weight.
    
    The top proposals with largest weight in the mix are returned, ordered by weight.
    """
    # Mix all proposals for the same input, summing up model weights
    mixed_proposals = [defaultdict(lambda: 0) for _ in range(len(proposals_lists[0]))]
    for proposals_list, weight in zip(proposals_lists, weights):
        for i, proposals in enumerate(proposals_list):
            for key in proposals:
                mixed_proposals[i][key] += weight
    # Sorte proposals by weights, select top
    sorted_proposals = [
        sorted(mixed_proposal, key=lambda x: mixed_proposal[x], reverse=True)[:proposals_limit]
        for mixed_proposal in mixed_proposals
    ]
    return sorted_proposals

In [33]:
bayes_search_proposals = rank_proposals_weights(
    [d["test_proposals"] for d in ensemble_data],
    [w for w in results.x], 
)
bayes_search_proposals

[['knit dress with lace trim',
  'lace-trimmed dress',
  'knit dress with lace trf',
  'combined lace dress',
  'lace dress trf',
  'camisole dress',
  'combined lace dress trf',
  'knit dress with lace detail',
  'camisole dress trf',
  'knit dress with lace'],
 ['pleated dress trf',
  'printed dress trf',
  'flowing dress trf',
  'floral print dress trf',
  'pleated midi dress',
  'floral print dress',
  'pleated midi dress trf',
  'flowing dress with pleats',
  'pleated dress',
  'long pleated dress trf'],
 ['nautical cap',
  'striped nautical cap',
  'check nautical cap',
  'nautical cap with peak',
  'textured nautical cap',
  'sporty nautical cap',
  'herringbone nautical cap',
  'premium nautical cap',
  'faux leather nautical cap',
  'nautical cap with cardboard packaging'],
 ['textured nautical cap',
  'nautical cap',
  'technical nautical cap',
  'limited edition nautical cap',
  'combined nautical cap',
  'faux leather nautical cap',
  'sporty nautical cap',
  'herringbone n

In [35]:
from postprocessing import save_submission

submission_name = "submission_69"
save_submission(bayes_search_proposals, submission_name)
save_submission(bayes_search_proposals, submission_name, zip=False)

## Try something similar with scipy

In [39]:
from scipy.optimize import differential_evolution

evolution_results = differential_evolution(
    partial(ensemble_error, dataset=full, oob_suggestions_by_model=oob_suggestions_by_model), 
    bounds=[
        (0.0, 1.0)
        for _ in range(len(ensemble_data))
    ],
    disp=True,
    polish=True,
    maxiter=10, 
    workers=1
)

TODO: something fails in the call to partial. Check jupyter logs

## Simple weighing by OOB scores observed during training

In [42]:
ensemble_info = [
    ('good_models/BART-base-ensemble-submission-48', 29.38),
    ('good_models/t5-base-ensemble-submission-55', 29.08), 
    ('good_models/BART-large-ensemble-submission-56', 30.29), 
    ('good_models/pegasus-large-ensemble-submission-58', 29.79),
    ('good_models/t5-small-ensemble-submission-60', 26.68),
    ('good_models/BART-large-ensemble-submission-71', 30.1), 
    #('good_models/BART-large-ensemble-submission-77', 29.76),  # This model was trained with bad validation data
    ('good_models/BART-large-ensemble-submission-80', 28.62)
]

ensemble_model_paths = [x[0] for x in ensemble_info]
ensemble_scores = {x[0]: x[1] for x in ensemble_info}  # Path to score dictionary

In [43]:
from glob import glob
from model import dcg
import os
from postprocessing import read_proposals_files
import re

def load_oob_scores(ensemble_model_path):
    # Load scores from file
    with open(f"{ensemble_model_path}/oob_scores.txt") as f:
        oob_scores = [float(score[:-1]) for score in f.readlines()]    
    return oob_scores

def load_ensemble_data(ensemble_model_path):
    ensemble_data = []
    # Load OOB scores
    oob_scores = load_oob_scores(ensemble_model_path)
    # Load test files
    pred_files = glob(f"{ensemble_model_path}/submission_*.csv")
    for pred_file in pred_files:
        # Extract validation fold number
        file_name = os.path.split(pred_file)[-1]
        data_fold = int(re.split("_|\.", file_name)[1])
        # Load test proposals
        test_proposals = read_proposals_files([pred_file])
        # Create ensemble info structure
        ensemble_data.append({
            "model_path": ensemble_model_path,
            "fold": data_fold,
            "oob_dcg": oob_scores[data_fold],
            "test_proposals": test_proposals
        })
    return ensemble_data

In [44]:
from itertools import chain

ensemble_data = list(chain(*[
    load_ensemble_data(ensemble_model_path)
    for ensemble_model_path in ensemble_model_paths
]))

In [45]:
len(ensemble_data)

94

In [46]:
from collections import defaultdict

def rank_proposals_weights(proposals_lists, weights, proposals_limit=10):
    """Joins and ranks several proposals lists. Each proposal list can have a different weight.
    
    The top proposals with largest weight in the mix are returned, ordered by weight.
    """
    # Mix all proposals for the same input, summing up model weights
    mixed_proposals = [defaultdict(lambda: 0) for _ in range(len(proposals_lists[0]))]
    for proposals_list, weight in zip(proposals_lists, weights):
        for i, proposals in enumerate(proposals_list):
            for key in proposals:
                mixed_proposals[i][key] += weight
    # Sorte proposals by weights, select top
    sorted_proposals = [
        sorted(mixed_proposal, key=lambda x: mixed_proposal[x], reverse=True)[:proposals_limit]
        for mixed_proposal in mixed_proposals
    ]
    return sorted_proposals

In [None]:
weights = [d["oob_dcg"] for d in ensemble_data]

FIXME: trying with ensemble optimal weights for independent classification models, doesn't work because we get negative weights

In [47]:
#import numpy as np
#weights = [np.log((d["oob_dcg"] / 100) / (1 - (d["oob_dcg"]/100))) for d in ensemble_data]
#weights = np.array(weights) / np.abs(min(weights))
#weights

Another option: combine ensemble level test scores with OOB model scores

In [48]:
#weights = [d["oob_dcg"] * ensemble_scores[d["model_path"]] for d in ensemble_data]
#weights = [ensemble_scores[d["model_path"]] for d in ensemble_data]
weights

In [50]:
dcg_ranked_proposals = rank_proposals_weights(
    [d["test_proposals"] for d in ensemble_data],
    #[d["oob_dcg"] for d in ensemble_data], 
    weights
)
dcg_ranked_proposals

[['knit dress with lace trim',
  'lace dress trf',
  'lace-trimmed dress',
  'knit dress with lace trf',
  'combined lace dress',
  'knit dress with lace detail',
  'camisole dress',
  'knit dress with lace',
  'contrast lace dress',
  'contrast lace dress trf'],
 ['pleated dress trf',
  'printed dress trf',
  'flowing dress trf',
  'floral print dress trf',
  'floral print dress',
  'pleated midi dress',
  'flowing dress with pleats',
  'pleated dress',
  'textured dress with pleats',
  'oversized dress trf'],
 ['nautical cap',
  'striped nautical cap',
  'textured nautical cap',
  'check nautical cap',
  'nautical cap with peak',
  'sporty nautical cap',
  'faux leather nautical cap',
  'limited edition nautical cap',
  'n nautical cap',
  'seashell nautical cap'],
 ['textured nautical cap',
  'nautical cap',
  'technical nautical cap',
  'limited edition nautical cap',
  'faux leather nautical cap',
  'check nautical cap',
  'nautical cap with strap',
  'combined nautical cap',
  's

In [10]:
from postprocessing import save_submission

submission_name = "submission_xx"
save_submission(dcg_ranked_proposals, submission_name)
save_submission(dcg_ranked_proposals, submission_name, zip=False)

### Add names from train strategy

In [51]:
from postprocessing import names_from_train

leakage_model_proposals = names_from_train(test["description"], full)
leakage_model_proposals

[[],
 ['animal print dress'],
 [],
 [],
 [],
 [],
 [],
 [],
 ['stretch top with wide straps'],
 ['stretch top with wide straps'],
 ['stretch top with wide straps'],
 [],
 ['strappy stretch top'],
 [],
 [],
 [],
 [],
 [],
 [],
 ['basic v-neck sweater'],
 ['turtleneck sweater'],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 ['faded-effect t-shirt', 'cropped tie-dye t-shirt trf'],
 ['striped rustic t-shirt with long sleeves',
  'cotton t-shirt',
  'strp l\\s 04',
  'striped cotton t-shirt',
  'premium quality long sleeve t-shirt',
  'tie-dye t-shirt',
  'textured t-shirt',
  'striped t-shirt',
  'long sleeve t-shirt trf',
  'ribbed long sleeve t-shirt',
  'long sleeve faded t-shirt'],
 [],
 [],
 [],
 [],
 ['double buckle belt with chain'],
 [],
 [],
 [],
 [],
 ['loose-fitting textured dress trf'],
 [],
 [],
 [],
 [],
 [],
 ['knit henley sweater'],
 ['jacket with bejewelled fringe'],
 [],
 [],
 ['knit tie-dye sweater',
  'purl-knit sweater trf',
  'purl-knit sweater',
  'textured sweater',
  'o

In [52]:
sum(len(x) > 0 for x in leakage_model_proposals) / len(leakage_model_proposals)

0.15405968077723803

In [53]:
sum(len(x) > 1 for x in leakage_model_proposals) / len(leakage_model_proposals)

0.009021512838306732

Compute some DCG score for this strategy as follows: this strategy only applies to 15.4% of the test data, and it roughly improves test DCG results in 2.2 points.

In [54]:
leaked_score = 2.2 / 15.4 * 100
leaked_score

14.285714285714288

In [57]:
dcg_ranked_leaked_proposals = rank_proposals_weights(
    [d["test_proposals"] for d in ensemble_data] + [leakage_model_proposals],
    #[d["oob_dcg"] for d in ensemble_data] + [1e6], 
    weights + [1e6], 
)
dcg_ranked_leaked_proposals

[['knit dress with lace trim',
  'lace dress trf',
  'lace-trimmed dress',
  'knit dress with lace trf',
  'combined lace dress',
  'knit dress with lace detail',
  'camisole dress',
  'knit dress with lace',
  'contrast lace dress',
  'contrast lace dress trf'],
 ['animal print dress',
  'pleated dress trf',
  'printed dress trf',
  'flowing dress trf',
  'floral print dress trf',
  'floral print dress',
  'pleated midi dress',
  'flowing dress with pleats',
  'pleated dress',
  'textured dress with pleats'],
 ['nautical cap',
  'striped nautical cap',
  'textured nautical cap',
  'check nautical cap',
  'nautical cap with peak',
  'sporty nautical cap',
  'faux leather nautical cap',
  'limited edition nautical cap',
  'n nautical cap',
  'seashell nautical cap'],
 ['textured nautical cap',
  'nautical cap',
  'technical nautical cap',
  'limited edition nautical cap',
  'faux leather nautical cap',
  'check nautical cap',
  'nautical cap with strap',
  'combined nautical cap',
  'st

In [59]:
from postprocessing import save_submission

submission_name = "submission_88"
save_submission(dcg_ranked_leaked_proposals, submission_name)
save_submission(dcg_ranked_leaked_proposals, submission_name, zip=False)

# TODOs

* If that doesn't work well, try with https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.differential_evolution.html#scipy.optimize.differential_evolution in multithreading.