In [2]:
import os
import sys
import itertools
import pickle
from glob import glob
from tqdm import tqdm_notebook as tqdm

import numpy as np
import pandas as pd
from scipy.stats import spearmanr

from matplotlib import pyplot as plt
from matplotlib_venn import venn2, venn3
import seaborn as sns

import torch
from torch import nn, optim
from transformers import BertConfig, BertTokenizer, BertModel, BertForMaskedLM#, BertLayer, BertEmbeddings
from transformers.modeling_bert import BertLayer, BertEmbeddings

In [3]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

# re-load functions
%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
%config InlineBackend.figure_formats = {'png', 'retina'}

In [4]:
DEVICE = 'cpu'

In [5]:
import sys
import pickle
from functools import partial
from glob import glob

import numpy as np
import pandas as pd
import scipy as sp
import torch
from scipy.stats import spearmanr
from tqdm import tqdm

class OptimizedRounder(object):
    """
    An optimizer for rounding thresholds
    to maximize Quadratic Weighted Kappa (QWK) score
    # https://www.kaggle.com/naveenasaithambi/optimizedrounder-improved
    """

    def __init__(self):
        self.coef_ = 0

    def _spearmanr_loss(self, coef, X, y, labels):
        """
        Get loss according to
        using current coefficients
        :param coef: A list of coefficients that will be used for rounding
        :param X: The raw predictions
        :param y: The ground truth labels
        """
        X_p = pd.cut(X, [-np.inf] + list(np.sort(coef)) +
                     [np.inf], labels=labels)

        # return -np.mean(spearmanr(y, X_p).correlation)
        return -spearmanr(y, X_p).correlation

    def fit(self, X, y, initial_coef):
        """
        Optimize rounding thresholds
        :param X: The raw predictions
        :param y: The ground truth labels
        """
        labels = self.labels
        loss_partial = partial(self._spearmanr_loss, X=X, y=y, labels=labels)
        self.coef_ = sp.optimize.minimize(
            # loss_partial, initial_coef, method='Powell')
            loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        """
        Make predictions with specified thresholds
        :param X: The raw predictions
        :param coef: A list of coefficients that will be used for rounding
        """
        labels = self.labels
        return pd.cut(X, [-np.inf] + list(np.sort(coef)) +
                      [np.inf], labels=labels)
        # [np.inf], labels=[0, 1, 2, 3])

    def coefficients(self):
        """
        Return the optimized coefficients
        """
        return self.coef_['x']

    def set_labels(self, labels):
        self.labels = labels

In [6]:
# sys.path.append('../scripts/')
# from get_optR3 import compute_spearmanr, get_opt_y_pred

import os
import pickle
import sys
from functools import partial
from glob import glob

import numpy as np
import pandas as pd
import scipy as sp
import torch
from scipy.stats import spearmanr
from tqdm import tqdm


class histogramBasedCoefInitializer:
    def __init__(self):
        self.bins = None

    def fit(self, labels):
        self.bins = pd.Series(
            labels).value_counts().sort_index().cumsum().values
        return self

    def predict(self, preds):
        preds = sorted(preds)
        res_threshs = []
        if self.bins is None:
            raise Exception('plz fit at first.')
        for _bin in self.bins[:-1]:
            res_threshs.append((preds[_bin - 1] + preds[_bin]) / 2)
        return res_threshs


class OptimizedRounder(object):
    """
    An optimizer for rounding thresholds
    to maximize Quadratic Weighted Kappa (QWK) score
    # https://www.kaggle.com/naveenasaithambi/optimizedrounder-improved
    """

    def __init__(self):
        self.coef_ = 0

    def _spearmanr_loss(self, coef, X, y, labels):
        """
        Get loss according to
        using current coefficients
        :param coef: A list of coefficients that will be used for rounding
        :param X: The raw predictions
        :param y: The ground truth labels
        """
        X_p = pd.cut(X, [-np.inf] + list(np.sort(coef)) +
                     [np.inf], labels=labels)

        # return -np.mean(spearmanr(y, X_p).correlation)
        return -spearmanr(y, X_p).correlation

    def fit(self, X, y, initial_coef):
        """
        Optimize rounding thresholds
        :param X: The raw predictions
        :param y: The ground truth labels
        """
        labels = self.labels
        loss_partial = partial(self._spearmanr_loss, X=X, y=y, labels=labels)
        self.coef_ = sp.optimize.minimize(
            loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        """
        Make predictions with specified thresholds
        :param X: The raw predictions
        :param coef: A list of coefficients that will be used for rounding
        """
        labels = self.labels
        return pd.cut(X, [-np.inf] + list(np.sort(coef)) +
                      [np.inf], labels=labels)
        # [np.inf], labels=[0, 1, 2, 3])

    def coefficients(self):
        """
        Return the optimized coefficients
        """
        return self.coef_['x']

    def set_labels(self, labels):
        self.labels = labels


def compute_spearmanr(trues, preds):
    rhos = []
    for col_trues, col_pred in zip(trues.T, preds.T):
        if len(np.unique(col_pred)) == 1:
            if col_pred[0] == np.max(col_trues):
                col_pred[np.argmin(
                    col_pred)] = np.min(col_trues)
            else:
                col_pred[np.argmax(
                    col_pred)] = np.max(col_trues)
        rhos.append(
            spearmanr(
                col_trues,
                col_pred
                #                  + np.random.normal(
                #                     0,
                #                     1e-7,
                #                     col_pred.shape[0])
            ).correlation)
    return rhos


def get_best_ckpt(ckpts):
    ckpt_dicts = []
    for ckpt in ckpts:
        ckpt_dict = {}
        ckpt_dict['ckpt'] = ckpt
        splitted_ckpt = ckpt.split('/')[-1].split('_')
        ckpt_dict['val_metric'] = float(splitted_ckpt[5])
        ckpt_dicts.append(ckpt_dict)
    ckpt_df = pd.DataFrame(ckpt_dicts)
    return ckpt_df.sort_values('val_metric', ascending=False).ckpt.iloc[0]


def get_snapshot_info_df(base_dir):
    res_dicts = []
    for fold in tqdm(list(range(5))):
        fold_ckpts = glob(f'{base_dir}/{fold}/*.pth')
        for ckpt in fold_ckpts:
            res_dict = {}
            splitted_ckpt = ckpt.split('/')[-1].split('_')
            res_dict['ckpt_filename'] = ckpt
            res_dict['fold'] = int(splitted_ckpt[1])
            res_dict['epoch'] = int(splitted_ckpt[3])
            res_dict['val_loss'] = float(splitted_ckpt[4])
            res_dict['val_metric'] = float(splitted_ckpt[5])
            res_dicts.append(res_dict)
    res_df = pd.DataFrame(res_dicts)
    res_df['rank'] = res_df.groupby(['fold']).val_metric.rank()
    return res_df


def get_opt_y_pred(y_true, y_pred, num_labels):
    optRs = []
    opt_y_preds = []

    # for i in tqdm(list(range(21))):
    for i in range(num_labels):
        optR = OptimizedRounder()
        labels = np.sort(np.unique(y_true[:, i]))
        optR.set_labels(labels)
        initer = histogramBasedCoefInitializer().fit(y_true[:, i])
        opt_thresh = initer.predict(y_pred[:, i])
        optR.fit(y_pred[:, i], y_true[:, i], opt_thresh)
        # opt_threshs.append(optR.coefficients())
        # opt_threshs[i] = optR.coefficients()
        optRs.append(optR)
        opt_y_preds.append((optR.predict(y_pred[:, i], optR.coefficients())))

    opt_y_preds = np.asarray(opt_y_preds).T
    return optRs, opt_y_preds


def opt(BASE_PATH, num_labels=30, snapshot_num=2):
    snapshot_df = get_snapshot_info_df(BASE_PATH)

    snapshot_dicts = {}
    state_dict_dicts = {}
    for fold in tqdm(list(range(5))):
        snapshot_dict = {}
        state_dict_dict = {}
        for i, row in snapshot_df.query(f'fold == {fold}').sort_values(
                'rank', ascending=False).reset_index(drop=True).iterrows():
            if i >= snapshot_num:
                continue
            ckpt = torch.load(row['ckpt_filename'])
            state_dict_dict[i] = ckpt['model_state_dict']
            if i == 0:
                qa_ids = ckpt['val_qa_ids']
                qa_ids_argsort = np.argsort(qa_ids)
                snapshot_dict['y_trues'] = [
                    ckpt['val_y_trues'][qa_ids_argsort]]
                snapshot_dict['y_preds'] = [
                    ckpt['val_y_preds'][qa_ids_argsort]]
            else:
                qa_ids = ckpt['val_qa_ids']
                qa_ids_argsort = np.argsort(qa_ids)
                snapshot_dict['y_trues'].append(
                    ckpt['val_y_trues'][qa_ids_argsort])
                snapshot_dict['y_preds'].append(
                    ckpt['val_y_preds'][qa_ids_argsort])
        snapshot_dicts[fold] = snapshot_dict
        state_dict_dicts[fold] = state_dict_dict

    y_preds = np.concatenate(
        [np.average(snapshot_dicts[fold]['y_preds'][:snapshot_num], axis=0)
         for fold in range(5)])
    y_trues = np.concatenate(
        [snapshot_dicts[fold]['y_trues'][0]
         for fold in range(5)])

    reses = []
    optRs = []

    for i in tqdm(list(range(num_labels))):
        y_pred = y_preds[:, i]
        y_true = y_trues[:, i]

        y_pred_argmax = np.argmax(y_pred)
        y_pred_argmin = np.argmin(y_pred)

        optR = OptimizedRounder()
        labels = np.sort(np.unique(y_true))
        optR.set_labels(labels)
        initer = histogramBasedCoefInitializer().fit(y_true)
        initial_coef = initer.predict(y_pred)
        optR.fit(y_pred, y_true, initial_coef=initial_coef)
        optRs.append(optR)
        res = optR.predict(y_pred, optR.coefficients())

        if len(np.unique(res)) == 1:
            if np.unique(res) == res[y_pred_argmax]:
                res[y_pred_argmin] = np.min(y_true)
            else:
                res[y_pred_argmax] = np.max(y_true)

        reses.append(res)
    reses = np.asarray(reses).T

    with open(f'{BASE_PATH}/optRs.pkl', 'wb') as fout:
        pickle.dump(optRs, fout)
    with open(f'{BASE_PATH}/snapshot_dicts.pkl', 'wb') as fout:
        pickle.dump(snapshot_dicts, fout)
    if not os.path.exists(f'{BASE_PATH}/state_dicts'):
        os.mkdir(f'{BASE_PATH}/state_dicts')
    for fold in range(5):
        for rank in range(snapshot_num):
            with open(f'{BASE_PATH}/state_dicts/fold_{fold}_rank_{rank}_state_dict.pkl', 'wb') as fout:
                pickle.dump(state_dict_dicts[fold][rank], fout)
    # with open(f'{BASE_PATH}/state_dict_dicts.pkl', 'wb') as fout:
    #     pickle.dump(state_dict_dicts, fout)

    original_score = compute_spearmanr(y_trues, y_preds)
    print(f'original_score: {original_score}')
    print(f'original_score: {np.mean(original_score)}')

    res_score = compute_spearmanr(y_trues, reses)
    print(f'res_score: {res_score}')
    print(f'res_score_mean: {np.mean(res_score)}')

    return res_score

In [7]:
class histogramBasedCoefInitializer:
    def __init__(self):
        self.bins = None

    def fit(self, labels):
        self.bins = pd.Series(labels).value_counts().sort_index().cumsum().values
        return self

    def predict(self, preds):
        preds = sorted(preds)
        res_threshs = []
        if self.bins is None:
            raise Exception('plz fit at first.')
        for _bin in self.bins[:-1]:
            res_threshs.append((preds[_bin - 1] + preds[_bin]) / 2)
        return res_threshs

## snapshot 済みの model を load

In [8]:
CKPT_DIR = '../mnt/checkpoints'

with open(f'{CKPT_DIR}/e078/snapshot_dicts.pkl', 'rb') as fin:
    bert_question_dict = pickle.load(fin)
with open(f'{CKPT_DIR}/e079/snapshot_dicts.pkl', 'rb') as fin:
    bert_answer_dict = pickle.load(fin)
    
with open(f'{CKPT_DIR}/e080/snapshot_dicts.pkl', 'rb') as fin:
    roberta_question_dict = pickle.load(fin)
with open(f'{CKPT_DIR}/e081/snapshot_dicts.pkl', 'rb') as fin:
    roberta_answer_dict = pickle.load(fin)
    
# with open(f'{CKPT_DIR}/e072/snapshot_dicts.pkl', 'rb') as fin:
#     gpt2_question_dict = pickle.load(fin)
# with open(f'{CKPT_DIR}/e073/snapshot_dicts.pkl', 'rb') as fin:
#     gpt2_answer_dict = pickle.load(fin)
    
with open(f'{CKPT_DIR}/e082/snapshot_dicts.pkl', 'rb') as fin:
    xlnet_question_dict = pickle.load(fin)
with open(f'{CKPT_DIR}/e083/snapshot_dicts.pkl', 'rb') as fin:
    xlnet_answer_dict = pickle.load(fin)

In [87]:
np.arange(10)[1::2]

array([1, 3, 5, 7, 9])

In [180]:
def _get_y_trues_and_y_preds_from_snapshot_dicts(snapshot_dicts, single, avg):
    y_trues, y_preds = [], []
    for fold in range(5):
        if single:
            y_trues.append(snapshot_dicts[fold]['y_trues'][0])
            y_preds.append(snapshot_dicts[fold]['y_preds'][0])
        else:
            if avg:
                y_trues.append(np.average(snapshot_dicts[fold]['y_trues'], axis=0))
                y_preds.append(np.average(snapshot_dicts[fold]['y_preds'], axis=0))
            else:
                y_trues.append(np.concatenate(snapshot_dicts[fold]['y_trues'], axis=1))
                y_preds.append(np.concatenate(snapshot_dicts[fold]['y_preds'], axis=1))
    y_trues = np.concatenate(y_trues)
    y_preds = np.concatenate(y_preds)
    return y_trues, y_preds

def get_y_trues_and_y_preds_from_QA_snapshota_dicts(Q_snapshot_dicts, A_snapshot_dicts, single=False, avg=True, model_num=2):
    q_y_trues, q_y_preds = _get_y_trues_and_y_preds_from_snapshot_dicts(Q_snapshot_dicts, single, avg)
    a_y_trues, a_y_preds = _get_y_trues_and_y_preds_from_snapshot_dicts(A_snapshot_dicts, single, avg)
    if avg:
        y_trues = np.concatenate([q_y_trues, a_y_trues], axis=1)
        y_preds = np.concatenate([q_y_preds, a_y_preds], axis=1)
    else:
        y_trues = np.concatenate([np.concatenate([q_y_trues[:, i*21:(i+1)*21], a_y_trues[:, i*9:(i+1)*9]], axis=1) for i in range(model_num)], axis=1)
        y_preds = np.concatenate([np.concatenate([q_y_preds[:, i*21:(i+1)*21], a_y_preds[:, i*9:(i+1)*9]], axis=1) for i in range(model_num)], axis=1)
        # y_preds = np.concatenate([q_y_preds[:, i*21:(i+1)*21] for i in range(model_num)] + [a_y_preds[:, i*9:(i+1)*9] for i in range(model_num)], axis=1)
    return y_trues, y_preds

In [65]:
# %debug
bert_y_trues, bert_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(bert_question_dict, bert_answer_dict)
roberta_y_trues, roberta_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(roberta_question_dict, roberta_answer_dict)
# gpt2_y_trues, gpt2_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(gpt2_question_dict, gpt2_answer_dict)
xlnet_y_trues, xlnet_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(xlnet_question_dict, xlnet_answer_dict)

In [66]:
# %debug
single_bert_y_trues, single_bert_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(bert_question_dict, bert_answer_dict, single=True)
single_roberta_y_trues, single_roberta_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(roberta_question_dict, roberta_answer_dict, single=True)
# single_gpt2_y_trues, single_gpt2_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(gpt2_question_dict, gpt2_answer_dict, single=True)
single_xlnet_y_trues, single_xlnet_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(xlnet_question_dict, xlnet_answer_dict, single=True)

In [12]:
# 良さそう
(bert_y_trues == roberta_y_trues).all(), (bert_y_trues == xlnet_y_trues).all()

(True, True)

In [13]:
(bert_y_preds == roberta_y_preds).all(), (bert_y_preds == xlnet_y_preds).all()

(False, False)

## stacking
 - elastic net
 - lasso
 - mlp
 - lgbm

In [193]:
# %debug
bert_y_trues, bert_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(bert_question_dict, bert_answer_dict, avg=False)
roberta_y_trues, roberta_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(roberta_question_dict, roberta_answer_dict, avg=False)
xlnet_y_trues, xlnet_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(xlnet_question_dict, xlnet_answer_dict, avg=False)

In [194]:
# こういう構造
(bert_y_trues[:, :30] == bert_y_trues[:, 30:]).all(), (roberta_y_trues[:, :30] == roberta_y_trues[:, 30:]).all(), (xlnet_y_trues[:, :30] == xlnet_y_trues[:, 30:]).all(), 

(True, True, True)

In [195]:
# %debug
single_bert_y_trues, single_bert_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(bert_question_dict, bert_answer_dict, single=True, avg=False)
single_roberta_y_trues, single_roberta_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(roberta_question_dict, roberta_answer_dict, single=True, avg=False)
single_xlnet_y_trues, single_xlnet_y_preds = get_y_trues_and_y_preds_from_QA_snapshota_dicts(xlnet_question_dict, xlnet_answer_dict, single=True, avg=False)

In [197]:
# single でも生合成取れてる
(bert_y_trues[:, :30] == single_bert_y_trues).all(), (roberta_y_trues[:, :30] == single_roberta_y_trues).all(), (xlnet_y_trues[:, :30] == single_xlnet_y_trues).all(), 

(True, True, True)

## 混ぜてみる
 - 4 つ全て混ぜてみる
 - 3/4 の　snapshot 混ぜてみる？

In [15]:
def blend_and_evaluate(y_trues, y_preds_list, eval_func, weights=None):
    if weights:
        y_preds = np.average(y_preds_list, axis=0, weights=weights)
    else:
        y_preds = np.average(y_preds_list, axis=0)
    eval_scores = eval_func(y_trues, y_preds)
    optRs, opt_y_preds = get_opt_y_pred(y_trues, y_preds, num_labels=30)
    opt_eval_scores = eval_func(y_trues, opt_y_preds)
    print(f'original_score: {np.mean(eval_scores)}')
    print(f'opt_score: {np.mean(opt_eval_scores)}')
    return eval_scores, opt_eval_scores, optRs

#### 試しに single, top-2 snapshots を見てみる

In [16]:
blend_and_evaluate(single_bert_y_trues, [single_bert_y_preds,  ], compute_spearmanr)
blend_and_evaluate(single_bert_y_trues, [single_roberta_y_preds, ], compute_spearmanr)
# blend_and_evaluate(single_bert_y_trues, [single_gpt2_y_preds, ], compute_spearmanr)
blend_and_evaluate(single_bert_y_trues, [single_xlnet_y_preds], compute_spearmanr)
print('fini!')

original_score: 0.39658756843296844
opt_score: 0.4240074936181189


  c /= stddev[:, None]
  c /= stddev[None, :]
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


original_score: 0.3952086168898242
opt_score: 0.417814664868746
original_score: 0.39510187276130454
opt_score: 0.42243226674838147
fini!


In [17]:
blend_and_evaluate(single_bert_y_trues, [bert_y_preds,  ], compute_spearmanr)
blend_and_evaluate(single_bert_y_trues, [roberta_y_preds, ], compute_spearmanr)
# blend_and_evaluate(single_bert_y_trues, [gpt2_y_preds, ], compute_spearmanr)
blend_and_evaluate(single_bert_y_trues, [xlnet_y_preds], compute_spearmanr)
print('fini!')

original_score: 0.40437836760234475
opt_score: 0.4374389165251154
original_score: 0.40003269950864506
opt_score: 0.4242294402147104
original_score: 0.4032963844416211
opt_score: 0.427509974207624
fini!


#### all blends がどうなるかを見てみる

In [19]:
# eval_scores, opt_eval_scores = blend_and_evaluate(single_bert_y_trues, [single_bert_y_preds,  single_roberta_y_preds, single_gpt2_y_preds, single_xlnet_y_preds], compute_spearmanr)
blend_and_evaluate(single_bert_y_trues, [single_bert_y_preds,  single_roberta_y_preds, single_xlnet_y_preds], compute_spearmanr)

original_score: 0.416481507046034
opt_score: 0.4491590833618151


([0.38905841394488094,
  0.6247808168032952,
  0.417437113437622,
  0.31287558919364283,
  0.36729001884264845,
  0.4320202417398792,
  0.3582580403543549,
  0.503933657224245,
  0.6036936477697188,
  0.09076789352774699,
  0.4882347355350011,
  0.7574907022006736,
  0.3685668751054396,
  0.18640940498197578,
  0.36225255465264955,
  0.4620301559770703,
  0.7840110446317836,
  0.3762973591422292,
  0.6851066282982686,
  0.06541704528211391,
  0.5096632287321945,
  0.26551775067893324,
  0.43252527040059774,
  0.1653135536360063,
  0.18447019226235326,
  0.3518189178180389,
  0.7604002686216906,
  0.28715523797183545,
  0.6915254508721946,
  0.21012340174193572],
 [0.3897376858369403,
  0.6248090602495397,
  0.49905186862391104,
  0.3105616628800056,
  0.3496754909976338,
  0.48729649612980774,
  0.3672386637173782,
  0.5174580611366218,
  0.6042530137894996,
  0.11817112469368243,
  0.46877103795952424,
  0.7702509397362571,
  0.5688303470123413,
  0.2918178318167146,
  0.6262717343310

## 2, 2, 1 じゃ disk 足りなかった...

In [20]:
blend_and_evaluate(bert_y_trues, [single_bert_y_preds,  single_roberta_y_preds, xlnet_y_preds], compute_spearmanr)
blend_and_evaluate(bert_y_trues, [bert_y_preds,  single_roberta_y_preds, single_xlnet_y_preds], compute_spearmanr)
blend_and_evaluate(bert_y_trues, [single_bert_y_preds,  roberta_y_preds, single_xlnet_y_preds], compute_spearmanr)
print('fini!')

original_score: 0.41745646679452425
opt_score: 0.4494514724953275
original_score: 0.41786576946688975
opt_score: 0.4513792627190148
original_score: 0.4181574464924081
opt_score: 0.44796109262403244
fini!


#### 2, 1, 1 で行くことにするので optR を取得

In [21]:
eval_scores, opt_eval_scores, optRs = blend_and_evaluate(bert_y_trues, [bert_y_preds,  single_roberta_y_preds, single_xlnet_y_preds], compute_spearmanr)

original_score: 0.41786576946688975
opt_score: 0.4513792627190148


In [23]:
!mkdir ../mnt/datasets/sub_e078_e083_bert2_roberta1_xlnet1

In [24]:
with open('../mnt/datasets/sub_e078_e083_bert2_roberta1_xlnet1/optRs.pkl', 'wb') as fout:
    pickle.dump(optRs, fout)

In [27]:
','.join(([str(i) for i in eval_scores]))

'0.3888662513004357,0.6264747430024324,0.417198052103926,0.31356995786700836,0.36621009889416495,0.43161328159188245,0.35960846883953307,0.5044099886093536,0.604524708512999,0.09185984413637717,0.4871602561126112,0.7588073461879603,0.3687724113728429,0.1878256807431471,0.3623045162839472,0.46221497917924714,0.7845698434998972,0.37561304685622554,0.685135875113699,0.06569265158283247,0.509277164422869,0.26846500559030767,0.44030220154571237,0.17059028253021058,0.18652642117523097,0.35943031009217935,0.7627075286902857,0.28993040895144306,0.6928979913635044,0.21341376785442578'

In [28]:
','.join(([str(i) for i in opt_eval_scores]))

'0.38647035222851883,0.6251840213259875,0.503341694761477,0.3140683242310781,0.37238351698553224,0.48891838338012195,0.3688643054450728,0.5174029320574922,0.6075334223395167,0.11234184006252432,0.4734302164363926,0.7724553926978851,0.5679733516339525,0.3086234975886461,0.6273452824025416,0.6104216226094872,0.7913164255678906,0.35227648410197876,0.6743979217445293,0.21251606433106468,0.5092991566008891,0.25989741044634646,0.4419292131037434,0.172812412462297,0.1891229575649494,0.360249882795264,0.7579315945321048,0.2690066667578972,0.6818714797543032,0.21199205562095982'

In [84]:
','.join([str(i) for i in eval_scores])

'0.38753764855932404,0.624591236065633,0.41612521851465645,0.31631905154404777,0.36958084656834655,0.4333397143525736,0.35647225646179814,0.5053740180720495,0.6024717179943407,0.09343879770080012,0.48788970013367006,0.7554768784733894,0.37043040207462113,0.19175643232185877,0.3632398123928754,0.4644149810472098,0.7882689049397114,0.37508934129137833,0.6875082798335962,0.06859634383678749,0.5058311174919892,0.26776035706473816,0.42468236885314736,0.18264774434316508,0.1960057923938459,0.3571316272381341,0.7629198053679217,0.29516358912502183,0.6885185616941287,0.1986028427408061'

In [85]:
','.join([str(i) for i in opt_eval_scores])

'0.38610697100222874,0.6220378810356756,0.4803108598080848,0.3087254707253665,0.3767010478726585,0.48153256901696284,0.3623044669561681,0.5109740248841105,0.6079582135843575,0.10998964284517494,0.4870548021086867,0.7644745305398798,0.5468575104750577,0.2850693099375624,0.6439619945737882,0.6181433940398664,0.7950887499733366,0.3480114972488913,0.6853066014210485,0.36239255378317503,0.503890831161848,0.26595541365451547,0.4260248516788567,0.18501720906281013,0.20022371905506964,0.3569060052051175,0.7579715572197582,0.27970705751496816,0.6821275873309829,0.20174712128915573'

#### weights どうすればよいか見てみる

In [86]:
np.arange(0, 1.1, 0.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [None]:
for i in np.arange(0, 1.1, 0.1):
    for j in np.arange(0, 1.01- i, 0.1):
        weights = [i, j, 1-i-j]
        print(f' ---------- {weights} -----------')
        eval_scores, opt_eval_scores, optRs = blend_and_evaluate(bert_y_trues,
                                                                                                                 [single_bert_y_preds,  roberta_y_preds, single_xlnet_y_preds],
                                                                                                                 compute_spearmanr,
                                                                                                                 weights=weights)

 ---------- [0.0, 0.0, 1.0] -----------
original_score: 0.3906094115371919
opt_score: 0.4208016538440192
 ---------- [0.0, 0.1, 0.9] -----------
original_score: 0.39690428620399504
opt_score: 0.42660201555875216
 ---------- [0.0, 0.2, 0.8] -----------
original_score: 0.40194251860786706
opt_score: 0.4322132022423592
 ---------- [0.0, 0.30000000000000004, 0.7] -----------
original_score: 0.4058057699680523
opt_score: 0.4386623102560342
 ---------- [0.0, 0.4, 0.6] -----------
original_score: 0.40850785092762126
opt_score: 0.44218874682162584
 ---------- [0.0, 0.5, 0.5] -----------
original_score: 0.41002182412980964
opt_score: 0.4462993778180665
 ---------- [0.0, 0.6000000000000001, 0.3999999999999999] -----------
original_score: 0.41026148164290954
opt_score: 0.4472571871083271
 ---------- [0.0, 0.7000000000000001, 0.29999999999999993] -----------
original_score: 0.4092935254165069
opt_score: 0.4447945948297066
 ---------- [0.0, 0.8, 0.19999999999999996] -----------
original_score: 0.40

## まとめ
 - (ちょっと気に入らんけど) bert * 2, roberta * 2, xlnet * 1 の average が良さそう (* 1, * 1, * 1, * 1 よりも)
 - 多分もう一つ別の良いモデルを入れられれば伸びる