In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def simulate_ensemble(dfs, key):
    return np.stack([df[key].values for df in dfs]).mean(axis=0)

def combine_predictions(names, key, set_name="validation", verbose=True):
    paths = [f"../output/{name}/version_0/predictions/{set_name}_pred.csv" for name in names]
    dfs = [pd.read_csv(x) for x in paths]
    y_hat = simulate_ensemble(dfs, key)
    mad = np.linalg.norm(y_hat - dfs[0]["y"], 1) / len(dfs[0])
    
    if len(names) == 2:
        plt.figure()
        plt.scatter(dfs[0][key] - dfs[0]["y"], dfs[1][key] - dfs[1]["y"])
        plt.show()
    
    if verbose:
        print("mad wo ensembles:")
        for name, df in zip(names, dfs):
            tmp = np.linalg.norm(df[key] - df["y"], 1) / len(df)
            print(f"{name}:  {tmp:.4f}")
        print(f"ensemble MAD: {mad:.4f}")
    
    return mad, y_hat

In [None]:
mad, y_hat = combine_predictions([
    "masked_effnet_highRes_fancy_aug",
    "masked_effnet_fancy_aug"], "y_hat_reg",)

In [None]:
mad, y_hat = combine_predictions([
    "effnet_highRes",
    "effnet_base",], "y_hat_reg")

In [None]:
mad, y_hat = combine_predictions([
    "masked_effnet_fancy_aug",
    "masked_effnet_shallow_fancy_aug"], "y_hat_reg")

In [None]:
mad, y_hat = combine_predictions([
    "masked_effnet_fancy_aug",
    "masked_effnet_super_shallow_fancy_aug"], "y_hat_reg")

In [None]:
mad, y_hat = combine_predictions([
    "masked_effnet-b4_fancy_aug",
    "masked_effnet-b4_shallow_fancy_aug"], "y_hat_reg_tta", "validation")

In [None]:
mad, y_hat = combine_predictions([
    "masked_incept_batchsize_128_fancy_aug",
    "masked_effnet_fancy_aug"], "y_hat_reg_tta")

In [None]:
mad, y_hat = combine_predictions([
#     "masked_effnet_base",
    "masked_effnet_fancy_aug",
#     "masked_incept_shallow_fancy_aug",
#     "masked_effnet_batchsize_128_fancy_aug",
#      "masked_incept_batchsize_128_fancy_aug",
#     "masked_effnet_shallow_fancy_aug",
#     "masked_effnet_fancy_aug_swa",
    "masked_effnet_super_shallow_fancy_aug",
    "masked_effnet_highRes_fancy_aug",
    "masked_effnet-b4_shallow_pretr_fancy_aug",
    "masked_incept_batchsize_128_fancy_aug"
], "y_hat_reg", "validation")

In [None]:
mad, y_hat_m = combine_predictions([
    "masked_effnet_fancy_aug",
    "masked_effnet_super_shallow_fancy_aug",
    "masked_effnet_highRes_fancy_aug",
    "masked_effnet-b4_shallow_pretr_fancy_aug",
    "masked_incept_batchsize_128_fancy_aug"
], "y_hat_reg", "test")

In [None]:
mad, y_hat = combine_predictions([
    "effnet_fancy_aug",
#     "effnet_fancy_aug_swa",
    "effnet_super_shallow_fancy_aug",
#     "masked_effnet_highRes_fancy_aug",  # Todo high res
    "effnet-b4_shallow_pretr_fancy_aug",
#     "incept_batchsize_128_fancy_aug",
], "y_hat_reg", "validation")

In [None]:
mad, y_hat_u = combine_predictions([
    "effnet_fancy_aug",
    "effnet_fancy_aug_swa",
    "effnet_super_shallow_fancy_aug",
#     "masked_effnet_highRes_fancy_aug",  # Todo high res
    "effnet-b4_shallow_pretr_fancy_aug"
], "y_hat_reg", "test")

In [None]:
y_hat = (y_hat_u + y_hat_m) / 2
y = pd.read_csv("../output/effnet_base/version_0/predictions/test_pred.csv")["y"]
np.linalg.norm(y_hat - y, 1) / len(y)