In [1]:
import os
import h5py
import numpy as np

In [2]:
# Best model results so far

models_path = "./outputs/"

# tier 3
mp1 = "tier3_Unet_19.hdf5"
mp2 = "tier3_Unet_21.hdf5"
mp3 = "tier3_Unet_24.hdf5"

# tier 5
mp4 = "tier5_Unet_19.hdf5"
mp5 = "tier5_Unet_23.hdf5"

# tier 7
mp6 = "tier7_Linknet_25.hdf5"
mp7 = "tier7_Linknet_27.hdf5"
mp8 = "tier7_Unet_24.hdf5"

In [3]:
with h5py.File(os.path.join(models_path, mp1), mode="r") as f:

    f_keys = list(f.keys())

In [4]:
def average(key_instance, model_list):

    array_list = []

    for m in model_list:
        
        with h5py.File(os.path.join(models_path, m) , mode="r") as f:

            arr = np.array(f[key_instance]["intensity"])

            array_list.append(arr)
    
    array_mean = np.mean(array_list, axis=0).astype("float16")

    return array_mean

def geometric_mean(key_instance, model_list, how="log"):

    array_list = []

    for m in model_list:
        
        with h5py.File(os.path.join(models_path, m) , mode="r") as f:

            arr = np.array(f[key_instance]["intensity"])

            array_list.append(arr)
    
    if how=="prod":
        # multiply element-wise
        product_array = np.prod(array_list, axis=0)

        geometric_mean = product_array ** (1/len(array_list))

        # could that control overflow issues?
        geometric_mean[~np.isfinite(geometric_mean)] = 0

        geometric_mean = geometric_mean.astype("float16")
    
    elif how=="log":

        log_array = [np.log(array+1) for array in array_list]

        log_mean = np.mean(log_array, axis=0)

        geometric_mean = np.exp(log_mean) - 1

        geometric_mean = np.where(geometric_mean<0, 0, geometric_mean)

        geometric_mean = geometric_mean.astype("float16")


    return geometric_mean

def get_max(key_instance, model_list):

    array_list = []

    for m in model_list:
        
        with h5py.File(os.path.join(models_path, m) , mode="r") as f:

            arr = np.array(f[key_instance]["intensity"])

            array_list.append(arr)
    
    array_max = np.max(array_list, axis=0).astype("float16")

    return array_max

In [None]:
# set 1
# max of best 3 from tier3
nickname = "tier3"
how = "max"
best_models = [mp1, mp2, mp3]

with h5py.File(f"outputs/{nickname}_{how}_of_{len(best_models)}.hdf5", mode="a") as output_file:
    
    for key in f_keys:

        if how == "mean":
            # compute average
            data = average(key, best_models)
        elif how == "max":
            # compute max
            data = get_max(key, best_models)
        elif how == "geomean":
            #compute geometric mean
            data = geometric_mean(key, best_models)

        # write to file
        output_file.create_group(key)
        output_file[key].create_dataset('intensity', data=data)

In [6]:
# set 2
# max of best 2 from tier5
nickname = "tier5"
how = "max"
best_models = [mp4, mp5]

with h5py.File(f"outputs/{nickname}_{how}_of_{len(best_models)}.hdf5", mode="a") as output_file:
    
    for key in f_keys:

        if how == "mean":
            # compute average
            data = average(key, best_models)
        elif how == "max":
            # compute max
            data = get_max(key, best_models)
        elif how == "geomean":
            #compute geometric mean
            data = geometric_mean(key, best_models)

        # write to file
        output_file.create_group(key)
        output_file[key].create_dataset('intensity', data=data)

In [7]:
# set 3
# max of best 3 from tier7
nickname = "tier7"
how = "max"
best_models = [mp6, mp7, mp8]

with h5py.File(f"outputs/{nickname}_{how}_of_{len(best_models)}.hdf5", mode="a") as output_file:
    
    for key in f_keys:

        if how == "mean":
            # compute average
            data = average(key, best_models)
        elif how == "max":
            # compute max
            data = get_max(key, best_models)
        elif how == "geomean":
            #compute geometric mean
            data = geometric_mean(key, best_models)

        # write to file
        output_file.create_group(key)
        output_file[key].create_dataset('intensity', data=data)

In [8]:
# set 4
# final submission
# mean of maxes of tiers 3, 5, and 7

bs1 = "tier3_max_of_3.hdf5"
bs2 = "tier5_max_of_2.hdf5"
bs3 = "tier7_max_of_3.hdf5"

nickname = "tiers357max"
how = "mean"
best_models = [bs1, bs2, bs3]

with h5py.File(f"outputs/{nickname}_{how}(final).hdf5", mode="a") as output_file:
    
    for key in f_keys:

        if how == "mean":
            # compute average
            data = average(key, best_models)
        elif how == "max":
            # compute max
            data = get_max(key, best_models)
        elif how == "geomean":
            #compute geometric mean
            data = geometric_mean(key, best_models)

        # write to file
        output_file.create_group(key)
        output_file[key].create_dataset('intensity', data=data)