In [20]:
import os
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder

In [23]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [24]:
pred_list = []
for i in [11, 15, 17, 19, 20]:
    pred = np.load('../logs/log.%03d/predictions.npy' % i)
    pred = softmax(pred.T).T
    print(i, np.argmax(pred, axis=1))  # 個別モデルの予測インデックス
    pred_list.append(pred)

11 [32 18  3 ... 39  3 33]
15 [17 18  3 ... 39  3 16]
17 [32 18  3 ... 22  3 15]
19 [24 18  3 ...  6  3 15]
20 [23 18  3 ...  6  3 27]


In [25]:
len(pred_list)

5

In [26]:
pred_list[0].shape

(9400, 41)

In [27]:
prediction = np.ones_like(pred_list[0])

In [28]:
prediction.shape

(9400, 41)

In [29]:
# アンサンブル
for pred in pred_list:
    prediction *= pred
prediction = prediction ** (1.0 / len(pred_list))

In [30]:
# アンサンブルモデルの予測インデックス
np.argmax(prediction, axis=1)

array([32, 18,  3, ...,  6,  3, 27])

In [31]:
prediction.shape

(9400, 41)

In [32]:
# データリストをDataFrameとしてロード
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/sample_submission.csv')

# DataFrameのラベルをインデックスに変換
le = LabelEncoder()
le.fit(np.unique(train_df.label))
train_df['label_idx'] = le.transform(train_df['label'])
num_classes = len(le.classes_)
print(le.classes_)

['Acoustic_guitar' 'Applause' 'Bark' 'Bass_drum' 'Burping_or_eructation'
 'Bus' 'Cello' 'Chime' 'Clarinet' 'Computer_keyboard' 'Cough' 'Cowbell'
 'Double_bass' 'Drawer_open_or_close' 'Electric_piano' 'Fart'
 'Finger_snapping' 'Fireworks' 'Flute' 'Glockenspiel' 'Gong'
 'Gunshot_or_gunfire' 'Harmonica' 'Hi-hat' 'Keys_jangling' 'Knock'
 'Laughter' 'Meow' 'Microwave_oven' 'Oboe' 'Saxophone' 'Scissors'
 'Shatter' 'Snare_drum' 'Squeak' 'Tambourine' 'Tearing' 'Telephone'
 'Trumpet' 'Violin_or_fiddle' 'Writing']


In [33]:
# Top3の出力を持つラベルに変換
prediction_tensor = torch.from_numpy(prediction)
_, indices = prediction_tensor.topk(3)  # (N, 3)
# ラベルに変換
predicted_labels = le.classes_[indices]
predicted_labels = [' '.join(lst) for lst in predicted_labels]
test_df['label'] = predicted_labels
test_df.to_csv('submission.csv', index=False)