In [None]:
import pandas as pd
import numpy as np
import os
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import pickle
import torch.nn as nn
import torch
from scipy.special import softmax
from sklearn.metrics import mean_squared_error, roc_auc_score

In [None]:
PATH = 'oofs/'
FILES = os.listdir(PATH)

OOF = np.sort( [f for f in FILES if 'pkl' in f] )
OOF_CSV = [pd.read_pickle(PATH+k).sort_values(by=['oid']) for k in OOF]

print('We have %i oof files...'%len(OOF))
print(); print(OOF)

In [None]:
x = np.zeros(( len(OOF_CSV[0]),len(OOF), 13 ))
y = np.zeros(( len(OOF_CSV[0]),len(OOF)))
models = {}
for k in range(len(OOF)):
    models[k] = OOF[k]
    for i in range(13):
        x[:, k, i] = OOF_CSV[k][f"pred_{i}"].values
    y[:,k] = OOF_CSV[k]["category"].values
    
x = softmax(x, axis=-1)

In [None]:
def custom_score(y_trues, y_preds):
    y_preds = np.argmax(y_preds, axis=-1)
    counter = 0
    for tr, pr in zip(y_trues, y_preds):
        if tr == pr:
            counter += 1
        else:
            counter -= 1
    metric = counter / len(y_trues)
    return metric

In [None]:
all = []
for k in range(x.shape[1]): # по моделям
    score = custom_score(y[:, k], x[:, k, :])
    all.append(score)
    print('Model %s has OOF score = %.4f'%(models[k], score))
    
m = [np.argmax(all)]; w = [] # argmin потому что с наименьшего скора

In [None]:
old = np.max(all);

In [None]:
RES = 1000; 
PATIENCE = 1000; 
TOL = 0.000
DUPLICATES = False

In [None]:
print('Ensemble custom_metric = %.4f by beginning with model %i %s'%(old,m[0], models[m[0]]))
print()

In [None]:
for kk in range(len(OOF)):
    
    # BUILD CURRENT ENSEMBLE
    md = x[:, m[0], :]
    # print(md.shape)
    for i,k in enumerate(m[1:]):
        md = w[i]*x[:,k] + (1-w[i])*md
        
    # FIND MODEL TO ADD
    mx = 0; mx_k = 0; mx_w = 0
    print('Searching for best model to add... ')
    
    # TRY ADDING EACH MODEL
    for k in range(x.shape[1]): # по моделям
        print(k,', ',end='')
        if not DUPLICATES and (k in m): continue
            
        # EVALUATE ADDING MODEL K WITH WEIGHTS W
        bst_j = 0; bst = 0; ct = 0
        for j in range(RES): # по порогу
            tmp = j/RES * x[:, k, :] + (1-j/RES) * md
            score = custom_score(y[:, k], tmp)
            if score>bst:
                bst = score
                bst_j = j/RES
            else: ct += 1
            if ct>PATIENCE: break
        print(bst)
        if bst>mx:
            mx = bst
            mx_k = k
            mx_w = bst_j
            
    # STOP IF INCREASE IS LESS THAN TOL
    inc = mx-old
    if inc<=TOL: 
        print(); print('No increase. Stopping.')
        break
        
    # DISPLAY RESULTS
    print();
    print('Ensemble score = %.4f after adding model %i with weight %.3f. Increase of %.4f'%(mx,mx_k,mx_w,inc))
    print()
    
    old = mx; m.append(mx_k); w.append(mx_w)

In [None]:
def foo(weights):
    weights_copy = weights.copy()
    for i, w_i in enumerate(weights[:-1]):
        for w_j in weights_copy[i+1:]:
            weights[i] *= 1 - w_j
    return weights

In [None]:
w = foo([1]+w)

In [None]:
print('We are using models',list(map(lambda x: str(x)+ "_" +models[x], m)))
print('with weights',w)
print('and achieve ensemble score = %.4f'%old)

In [None]:
# Проверка
x_all = np.zeros(x[:,0].shape)
for model, weight in zip(m, w):
    x_all += x[:, model] * weight
    
print(custom_score(x_all, y[:, 0]))