In [1]:
from sklearn.datasets import make_blobs
import numpy as np
from keras.models import Sequential, Model
from keras_uncertainty.layers import StochasticDropout, SamplingSoftmax
from keras_uncertainty.models import DisentangledStochasticClassifier, StochasticClassifier
from keras.layers import Dense, Input, Dropout
from keras_uncertainty.utils import numpy_entropy


In [2]:
X, y = make_blobs(n_samples=1000, n_features=2, centers=[[-1.5, 1.5],[0, -1.5]], random_state=0)
BATCH_SIZE = 256
NUM_SAMPLES = 100

In [3]:
def uncertainty(probs):
    return numpy_entropy(probs, axis=-1)


def two_head_model(trunk_model, num_classes=2, num_samples=100):
    inp = Input(shape=(2,))
    x = trunk_model(inp)
    logit_mean = Dense(num_classes, activation="linear")(x)
    logit_var = Dense(num_classes, activation="softplus")(x)
    probs = SamplingSoftmax(num_samples=num_samples, variance_type="linear_std")([logit_mean, logit_var])
    
    train_model = Model(inp, probs, name="train_model")
    pred_model = Model(inp, [logit_mean, logit_var], name="pred_model")

    train_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    
    return train_model, pred_model


def train_stochastic_model(trunk_model, x_train, y_train, epochs=300):
    train_model, pred_model = two_head_model(trunk_model)
    train_model.fit(x_train, y_train, verbose=2, epochs=epochs, batch_size=BATCH_SIZE)

    fin_model = DisentangledStochasticClassifier(pred_model, epi_num_samples=NUM_SAMPLES)

    return fin_model

def eval_disentangled_model(disentangled_model, samples):
    pred_mean, pred_ale_std, pred_epi_std = disentangled_model.predict(samples, batch_size=BATCH_SIZE)
    ale_entropy = uncertainty(pred_ale_std)
    epi_entropy = uncertainty(pred_epi_std)

    return ale_entropy, epi_entropy

def train_disentangling_dropout_model(x_train, y_train, prob=0.5):
    model = Sequential()
    model.add(Dense(32, activation="relu", input_shape=(2,)))
    model.add(StochasticDropout(prob))
    model.add(Dense(32, activation="relu"))
    model.add(StochasticDropout(prob))

    return train_stochastic_model(model, x_train, y_train, epochs=50)

In [4]:
disentangled_dropout_model = train_disentangling_dropout_model(X, y)

In [7]:

min_x, max_x = [-30, -30] , [30, 30]
res = 1

xx, yy = np.meshgrid(np.arange(min_x[0], max_x[0], res), np.arange(min_x[1], max_x[1], res))
domain = np.c_[xx.ravel(), yy.ravel()]

print(domain.shape)

ale_entropy, epi_entropy = eval_disentangled_model(disentangled_dropout_model, domain)
ale_entropy = ale_entropy.reshape(xx.shape)
epi_entropy = epi_entropy.reshape(xx.shape)

In [8]:
import matplotlib.pyplot as plt
import matplotlib.pylab as pl
from matplotlib.colors import ListedColormap
cmap = pl.cm.binary
my_cmap = cmap(np.arange(cmap.N))
#my_cmap[:, 0] = 0.0
my_cmap[:, -1] = 0.7
my_cmap = ListedColormap(my_cmap)


fig, axes =  plt.subplots(ncols=1, nrows=2, figsize=(6, 10), squeeze=False)    
ax_ale = axes[0][0]
ax_epi = axes[1][0]

cf_ale = ax_ale.contourf(xx, yy, ale_entropy, antialiased=True)
ax_ale.scatter(X[:, 0], X[:, 1], c=y, cmap=my_cmap)
ax_ale.get_xaxis().set_ticks([])
ax_ale.get_yaxis().set_ticks([])
ax_ale.autoscale(False)

cf_epi = ax_epi.contourf(xx, yy, epi_entropy, antialiased=True)
ax_epi.scatter(X[:, 0], X[:, 1], c=y, cmap=my_cmap)
ax_epi.get_xaxis().set_ticks([])
ax_epi.get_yaxis().set_ticks([])
ax_epi.autoscale(False)

ax_ale.set_ylabel("Aleatoric")
ax_epi.set_ylabel("Epistemic")
ax_ale.set_title("Multi-Head Disentangled MC-Dropout")

plt.show()

In [323]:
def evaluate_entropy_dropout_model(entropy_model, samples, num_samples=NUM_SAMPLES):
    preds = entropy_model.predict(samples, num_samples=num_samples)
    print(preds.shape)
    entropy = uncertainty(preds)
    
    return entropy

def train_entropy_dropout_model(x_train, y_train, prob=0.5):
    model = Sequential()
    model.add(Dense(32, activation="relu", input_shape=(2,)))
    model.add(StochasticDropout(prob))
    model.add(Dense(32, activation="relu"))
    model.add(StochasticDropout(prob))
    model.add(Dense(2, activation="softmax"))
    
    model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    
    model.fit(x_train, y_train, verbose=2, epochs=50)
    
    mc_model = StochasticClassifier(model)

    return mc_model


In [324]:
entropy_model = train_entropy_dropout_model(X, y)

In [325]:
individual_predictions = entropy_model.predict_samples(domain, num_samples=NUM_SAMPLES)
individual_predictions.shape

In [347]:
from keras import activations

# Here we try something new: what if we apply this entropy formulation directly on the logits somehow
# I found this has problems
# The entropy formulation no-longer works, because log of negative is NaN
# I am not able to find an alternative approach to this

entropy_model.model.layers[-1].activation = activations.linear
entropy_model.model.compile(loss="sparse_categorical_crossentropy")
individual_predictions = entropy_model.predict_samples(domain, num_samples=NUM_SAMPLES)
original_individual_predictions = individual_predictions

In [348]:
import keras_uncertainty.backend as K

individual_predictions = K.softmax(K.constant(original_individual_predictions), axis=-1).numpy()

In [349]:
def predictive_entropy(probs, axis=-1, eps=1e-6):
    probs = np.mean(probs, axis=0)
    return -np.sum(probs * np.log(probs + eps), axis=axis)

def shannon_entropy(probs, eps=1e-6):
  return -np.mean((probs * np.log(probs + eps)).sum(axis=-1), axis=0)

def mutual_information(probs):
  return predictive_entropy(probs) - shannon_entropy(probs)

def logit_variance(logits):
    return np.var(logits, axis=0).mean(axis=-1)

In [357]:
fig, axes =  plt.subplots(ncols=1, nrows=4, figsize=(6, 10), squeeze=False)    
ax_pred = axes[0][0]
ax_exp = axes[1][0]
ax_mi = axes[2][0]
ax_var = axes[3][0]



pred_entr = predictive_entropy(individual_predictions)
pred_entr = pred_entr.reshape(xx.shape)
cf_pred = ax_pred.contourf(xx, yy, pred_entr, antialiased=True)
ax_pred.scatter(X[:, 0], X[:, 1], c=y, cmap=my_cmap)
ax_pred.get_xaxis().set_ticks([])
ax_pred.get_yaxis().set_ticks([])
ax_pred.autoscale(False)

exp_entr = expected_entropy(individual_predictions)
exp_entr = exp_entr.reshape(xx.shape)
cf_exp = ax_exp.contourf(xx, yy, exp_entr, antialiased=True)
ax_exp.scatter(X[:, 0], X[:, 1], c=y, cmap=my_cmap)
ax_exp.get_xaxis().set_ticks([])
ax_exp.get_yaxis().set_ticks([])
ax_exp.autoscale(False)


mut_inf = mutual_information(individual_predictions)
mut_inf = mut_inf.reshape(xx.shape)
cf_mi = ax_mi.contourf(xx, yy, mut_inf, antialiased=True)
ax_mi.scatter(X[:, 0], X[:, 1], c=y, cmap=my_cmap)
ax_mi.get_xaxis().set_ticks([])
ax_mi.get_yaxis().set_ticks([])
ax_mi.autoscale(False)


pred_var = np.std(individual_predictions, axis=0).mean(axis=-1)
pred_var = pred_var.reshape(xx.shape)
cf_var = ax_var.contourf(xx, yy, pred_var, antialiased=True)
ax_var.scatter(X[:, 0], X[:, 1], c=y, cmap=my_cmap)
ax_var.get_xaxis().set_ticks([])
ax_var.get_yaxis().set_ticks([])
ax_var.autoscale(False)



ax_pred.set_ylabel("Predictive Entropy (mix)")
ax_exp.set_ylabel("Expected Entropy (ale)")
ax_mi.set_ylabel("Mutual Information (epi)")
ax_var.set_ylabel("Probability Variance (epi)")


ax_pred.set_title("Entropy disentangled MC-Dropout")

In [354]:
np.var(individual_predictions, axis=0).shape

In [174]:
from sklearn.model_selection import train_test_split
from tqdm import tqdm

disentangling_accuracies = []
disentangling_aleatorics = []
disentangling_epistemics = []

entropy_accuracies = []
entropy_aleatorics = []
entropy_epistemics = []
X_test, y_test = make_blobs(n_samples=500, n_features=2, centers=[[-1.5, 1.5],[0, -1.5]], random_state=0)
dataset_sizes = np.logspace(start=1, stop=10, base=2, num=20) 

for dataset_size in tqdm(dataset_sizes):

    X_train, y_train = make_blobs(n_samples=int(dataset_size), n_features=2, centers=[[-1.5, 1.5],[0, -1.5]], random_state=0)
    
    
    disentangle_model = train_disentangling_dropout_model(X_train, y_train)
    entropy_model = train_entropy_dropout_model(X_train, y_train)

    pred_mean, pred_ale_std, pred_epi_std = disentangle_model.predict(X_test, batch_size=BATCH_SIZE)
    entropy_preds = entropy_model.predict_samples(X_test, num_samples=NUM_SAMPLES, batch_size=BATCH_SIZE)
    disentangling_accuracies.append(accuracy_score(y_test, pred_mean.argmax(axis=1)))
    disentangling_aleatorics.append(uncertainty(pred_ale_std).mean())
    disentangling_epistemics.append(uncertainty(pred_epi_std).mean())

    entropy_accuracies.append(accuracy_score(y_test, entropy_preds.mean(axis=0).argmax(axis=1)))
    entropy_aleatorics.append(expected_entropy(entropy_preds).mean())
    entropy_epistemics.append(mutual_information(entropy_preds).mean())


In [168]:
dataset_size

In [183]:

dataset_sizes = np.logspace(start=1, stop=10, base=2, num=20) 

plt.plot(dataset_sizes, disentangling_accuracies, label="Disentangling model")
plt.plot(dataset_sizes, entropy_accuracies, label="Entropy model")
plt.ylabel("Accuracy")
plt.xlabel("Dataset size")
plt.legend()
plt.show()


In [184]:

plt.plot(dataset_sizes, disentangling_aleatorics, label="Disentangling model")
plt.plot(dataset_sizes, entropy_aleatorics, label="Entropy model")
plt.ylabel("Aleatoric uncertainty")
plt.xlabel("Dataset size")
plt.legend()
plt.show()


In [187]:

plt.plot(dataset_sizes, disentangling_epistemics / max(disentangling_epistemics), label="Disentangling model (normalised)")
plt.plot(dataset_sizes, entropy_epistemics / max(entropy_epistemics), label="Entropy model (normalised)")
plt.ylabel("Epistemic uncertainty")
plt.xlabel("Dataset size")
plt.legend()
plt.show()


In [186]:

# plt.plot(dataset_sizes, disentangling_epistemics, label="Disentangling model")
plt.plot(dataset_sizes, entropy_epistemics, label="Entropy model")
plt.ylabel("Epistemic uncertainty")
plt.xlabel("Dataset size")
plt.legend()
plt.show()

In [191]:
plt.plot(dataset_sizes, disentangling_aleatorics, label="Aleatoric")
plt.plot(dataset_sizes, disentangling_epistemics, label="Epistemic")
plt.title("Multi-head disentangled uncertainties vs. dataset size")
plt.legend()
plt.ylabel("Uncertainty")
plt.xlabel("Dataset size")


In [207]:
plt.plot(dataset_sizes, (entropy_aleatorics - min(entropy_aleatorics)) / max(entropy_aleatorics - min(entropy_aleatorics)), label="Aleatoric")
plt.plot(dataset_sizes, (entropy_epistemics - min(entropy_epistemics)) / max(entropy_epistemics -min(entropy_epistemics) ), label="Epistemic")
plt.title("Entropy disentangled uncertainties vs. dataset size")
plt.legend()
plt.ylabel("Uncertainty (normalised)")
plt.xlabel("Dataset size")
