In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
import copy
from scipy.stats import mode

In [2]:
label = pd.read_csv("merged.csv")
label_2019 = label[label["source"] == 2019]
label_2020 = label[label["source"] == 2020]

label = pd.concat([label_2019, label_2020], axis=0)

In [3]:
models = [
    "seresnext50",
    "seresnext101",
    "efficientnet_b3",
    "efficientnet_b5",
    "ese_vovnet39b",
]

seeds = [
    1997,
    1996,
    123,
    42,
    323,
]

predictions = []

for model, seed in zip(models, seeds):
    
    prediction = copy.deepcopy(label_2020)
    prediction["label"] = 0
    
    file_path = model + "/"
    
    for fold in range(5):
        prediction_fold = np.load(file_path + "predict_fold_{}.npy".format(fold))
        index_fold = pd.read_csv(file_path + "val_fold_{}_seed_{}.csv".format(fold, seed))["Unnamed: 0"].values
        prediction.loc[index_fold, "label"] = prediction_fold
        
    predictions.append(prediction["label"].values)
    
prediction["label"] = mode(predictions, axis=0)[0].squeeze()

print(accuracy_score(np.round(prediction["label"].values).astype(np.int), label_2020["label"].values))

prediction = pd.concat([label_2019, prediction], axis=0)

print(accuracy_score(np.round(prediction["label"].values).astype(np.int), label["label"].values))
        
prediction.head()

0.902182548955461
0.9205300527774614


Unnamed: 0,image_id,label,source
21397,train-cmd-1418.jpg,3,2019
21398,train-cbsd-154.jpg,1,2019
21399,train-cbsd-498.jpg,1,2019
21400,train-cmd-136.jpg,3,2019
21401,train-cmd-1226.jpg,3,2019


In [4]:
label_pseudo = copy.deepcopy(label)

# label_pseudo["pseudo_label"] = 0.4 * prediction["label"] + 0.6 * label["label"]
label_pseudo["pseudo_label"] = prediction["label"]
label_pseudo["pseudo_label"] = np.round(label_pseudo["pseudo_label"]).astype(np.int)

label_pseudo.head()

Unnamed: 0,image_id,label,source,pseudo_label
21397,train-cmd-1418.jpg,3,2019,3
21398,train-cbsd-154.jpg,1,2019,1
21399,train-cbsd-498.jpg,1,2019,1
21400,train-cmd-136.jpg,3,2019,3
21401,train-cmd-1226.jpg,3,2019,3


In [5]:
print(accuracy_score(label_pseudo["pseudo_label"].values, label_pseudo["label"].values))

0.9205300527774614


In [6]:
label_pseudo.to_csv("merged_pseudo.csv", index=False)