In [1]:
import numpy as np
import scipy.stats
from scipy.special import softmax

In [2]:
def agreement(predictions: np.array, reference: np.array):
    """Returns 1 if predictions match and 0 otherwise."""
    return (predictions.argmax(axis=-1) == reference.argmax(axis=-1)).mean()


def total_variation_distance(predictions: np.array, reference: np.array):
    """Returns total variation distance."""
    return np.abs(predictions - reference).sum(axis=-1).mean() / 2.


def w2_distance(predictions: np.array, reference: np.array):
    """Returns W-2 distance """
    NUM_SAMPLES_REQUIRED = 1000
    assert predictions.shape[0] == reference.shape[0], "wrong predictions shape"
    assert predictions.shape[1] == NUM_SAMPLES_REQUIRED, "wrong number of samples"
    return -np.mean([scipy.stats.wasserstein_distance(pred, ref) for
                     pred, ref in zip(predictions, reference)])

In [3]:
cifar_gt = np.genfromtxt("eval_data/cifar_probs.csv")
medmnist_gt = np.genfromtxt("eval_data/medmnist_probs.csv")
uci_gt = np.genfromtxt("eval_data/uci_samples.csv")

### CIFAR-10

In [4]:
# replace with your predictions
# predictions = np.genfromtxt("submitted_zip/cifar_probs.csv")
predictions = softmax(np.random.randn(50000, 10), axis=-1)

In [5]:
agreements = agreement(predictions[:10000], cifar_gt[:10000]), agreement(predictions, cifar_gt)
tvs = (total_variation_distance(predictions[:10000], cifar_gt[:10000]),
      total_variation_distance(predictions, cifar_gt))
print(f"Public Agreement: {agreements[0]}\n"
      f"Full Agreement: {agreements[1]}\n")
print(f"Public TV: {tvs[0]}\n"
      f"Full TV: {tvs[1]}\n")

Public Agreement: 0.1005
Full Agreement: 0.0992

Public TV: 0.8069905759986005
Full TV: 0.7489844201069392



### MedMNIST

In [6]:
# replace with your predictions
# predictions = np.genfromtxt("submitted_zip/medmnist_probs.csv")
predictions = softmax(np.random.randn(2000, 7), axis=-1)

In [7]:
agreements = agreement(predictions[:1000], medmnist_gt[:1000]), agreement(predictions, medmnist_gt)
tvs = (total_variation_distance(predictions[:1000], medmnist_gt[:1000]),
      total_variation_distance(predictions, medmnist_gt))
print(f"Public Agreement: {agreements[0]}\n"
      f"Full Agreement: {agreements[1]}\n")
print(f"Public TV: {tvs[0]}\n"
      f"Full TV: {tvs[1]}\n")

Public Agreement: 0.156
Full Agreement: 0.148

Public TV: 0.6947956197485198
Full TV: 0.6994507621011583



### UCI

In [8]:
# replace with your samples
# samples = np.genfromtxt("submitted_zip/uci_samples.csv")
samples = np.random.randn(256, 1000)
samples.shape

(256, 1000)

In [9]:
# UCI
samples = samples
result_public, result_full = w2_distance(samples[:100, ], uci_gt.T[:100, ]), w2_distance(samples, uci_gt.T)
print(f"Public: {result_public}\nFull: {result_full}")

Public: -0.9814617912810042
Full: -0.9283811233060233
