# cvやlbのスコアを考慮したアンサンブル
スコアを考慮しつつランダムな重み付けをしてアンサンブルする  
スコアはファイル名に記載する　  
e.g. 0.152_submission.csv

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import copy
import glob
import random
from random import random as rd
import gc

In [2]:
df_train = pd.read_csv("../input/ventilator-pressure-prediction/train.csv")
unique_pressures = df_train["pressure"].unique()
sorted_pressures = np.sort(unique_pressures)
total_pressures_len = len(sorted_pressures)

def find_nearest(prediction):
    insert_idx = np.searchsorted(sorted_pressures, prediction)
    if insert_idx == total_pressures_len:
        return sorted_pressures[-1]
    elif insert_idx == 0:
        return sorted_pressures[0]
    lower_val = sorted_pressures[insert_idx - 1]
    upper_val = sorted_pressures[insert_idx]
    return lower_val if abs(lower_val - prediction) < abs(upper_val - prediction) else upper_val

def set_seed(seed = 42):
    np.random.seed(seed)
    random_state = np.random.RandomState(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    return random_state

def wc(input_list):
    l = []
    for i in range(len(input_list)):
        root, ext = os.path.splitext(input_list[i])
        basename = os.path.basename(root)
        print(basename)
        public_lb_score = int(input_list[i].split("/")[-1].split(".")[1].split("_")[0]) 
        l.append(public_lb_score)
        input_list[i] = (pd.read_csv(input_list[i]).pressure).ravel()
    output = 0
    l_sum = sum(l)
    if len(input_list) == 1:
        output = input_list[0]
    else:
        weight1 = (l[1] / l_sum) + 0.1
        weight2= 1 - weight1
        output += input_list[0] * weight1 + input_list[1] * weight2
    return output

def g(dp):
# input: the dataset path of the prediction result files
# file name format: public lb score or pulbic lb score + name, e.g., 0.335_LSTM_baseline

    # get all the files to blend
    l = []
    for i in glob.iglob(f'{dp}/*'):
        l.append(i)
    file_count = len(l)
    loop_time = 500 // file_count
    # calculate the number of files in the input dataset
    # and split them 2 by 2
    splits = file_count // 2
    # sort the file based on their public lb score
    l.sort()
    flist = []
    # create a file list
    # append the 2 by 2 files as one element
    # in the last loop, append all the files which are not necessarily 2 files
    for i in range(splits):
        if i == splits - 1:
            flist.append(l[i * round(len(l) / splits): ])
        else:
            flist.append(l[i * round(len(l) / splits): (i + 1) * round(len(l) / splits)])
    # transfrom each element in the file list into one blended prediction
    for i in range(len(flist)):   
        flist[i] = wc(flist[i])
    pred_list = []
    # loop a large number of times
    # to converge the result into a stable expected value
    for i in range(loop_time):      
        weight = []        
        set_seed(i)
        # create a weight list with the same length as the file list
        for i in range(len(flist)):
            weight.append(rd())  
        weight_sum = sum(weight)
        # normalize the weights
        for i in range(len(weight)):
            weight[i] /= weight_sum
        weight.sort(reverse = True)
        temp = 0
        # assign each weights to each blended prediction
        for i in range(len(flist)):
            temp += flist[i] * weight[i]
        pred_list.append(temp)
        del temp
        gc.collect()
    output_dir = './rwb_submissions/'
    os.makedirs(output_dir, exist_ok=True)
    output = pd.read_csv("../input/ventilator-pressure-prediction/sample_submission.csv")
    output.pressure = np.median(np.vstack(pred_list), axis = 0)
    output["pressure"] = output["pressure"].apply(find_nearest)
    output.to_csv(output_dir+f'rwb_{loop_time}loops_submission_{input_dir}.csv',index=False)

In [3]:
input_dir='all'
g(f'./submissions/{input_dir}')

0.144_median_submission_GB_VPP_Whoppity_dub_dub
0.148_Random_Weights_Blending_Tool_submission
0.148_submission_GB_VPP_To_Infinity_and_Beyond!
0.149_submission_Ventilator_LSTM_Model
0.149_submission_median_1015_lstm_hidden=1024_6layer-ver2_mod-feat3_dropout=0.0
0.149_submission_median_1016_lstm_hidden=1024_6layer-ver2_mod-feat7_dropout=0.0
0.149_submission_median_1017_lstm_hidden=1024_6layer-ver5_mod-feat3_dropout=0.0
0.149_submission_median_1018_lstm_hidden=1024_6layer-ver6_mod-feat3_dropout=0.0
0.149_submission_median_1020_lstm_hidden=1024_6layer-ver5_mod-feat11_dropout=0.0
0.149_submission_median_1020_lstm_hidden=1024_6layer-ver6_mod-feat11_dropout=0.0
0.150_submission_median_1014_lstm_hidden=1024_6layer-ver2_mod-feat3
0.150_submission_median_1016_lstm_hidden=1024_6layer-ver2_mod-feat7_dropout=0.0_noemb
0.150_submission_median_1018_lstm_hidden=1024_6layer-ver7_mod-feat3_dropout=0.0
0.151_submission_median_1012_lstm_hidden=1024_5layer_mod-feat3
0.151_submission_median_1014_lstm_hidden