In [6]:
import os
import gc

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import rankdata

In [11]:
def ensemble_predictions(predictions, weights, type_="linear"):
    assert np.isclose(np.sum(weights), 1.0)
    if type_ == "linear":
        res = np.average(predictions, weights=weights, axis=0)
    elif type_ == "harmonic":
        res = np.average([1 / p for p in predictions], weights=weights, axis=0)
        return 1 / res
    elif type_ == "geometric":
        numerator = np.average(
            [np.log(p) for p in predictions], weights=weights, axis=0
        )
        res = np.exp(numerator / sum(weights))
        return res
    elif type_ == "rank":
        res = np.average([rankdata(p) for p in predictions], weights=weights, axis=0)
        return res / (len(res) + 1)
    return res

In [7]:
train_df = pd.read_csv("../input/train.csv")
test_df = pd.read_csv("../input/test.csv")
sample_df = pd.read_csv("../input/atmaCup5__sample_submission.csv")

In [10]:
sub1 = np.load("../output/preds.0.86263.npy")
sub2 = np.load("../output/preds.0.85306_pseudo_labeling.npy")

In [13]:
ens_sub = ensemble_predictions([sub1, sub2], [1/2, 1/2], "rank")

In [14]:
sample_df["target"] = ens_sub

In [15]:
sample_df.to_csv("../output/ensemble.csv", index=False)