In [1]:
import numpy as np
from utils import prepare_data, create_mnist_cnn_model, eval_generator, eval_precommit_generator, sparse_mnist_generator_nonzero, eval_precommit_adversarial_generator, eval_optimal_adversary_generator

batch_size = 128
num_classes = 10
epochs = 12

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [None]:
x_train, y_train, x_test, y_test, input_shape = prepare_data()

In [None]:
val_data_generator = sparse_mnist_generator_nonzero(
    x_test,
    y_test,
    batch_size=x_test.shape[0],
    sparsity=6,
    shuffle=False
)

# Eval judge alone

- judge is a sparse MNIST classifier
- 6 non-zero pixels are randomly sampled from an input image

### Judge - 5k batches

In [67]:
judge = create_mnist_cnn_model(num_classes, input_shape)
judge.load_weights('model_sparse_mnist_generator_nonzero_5k.h5py')

In [68]:
# judge samples 6 pixels on random -> we need to see more runs and look at mean and variance
accuracies = eval_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 52.99%
variance: 9.043600E-06


### Better judge - 30k batches

In [None]:
judge = create_mnist_cnn_model()
judge.load_weights('model_sparse_mnist_generator_nonzero_30k.h5py')

In [64]:
accuracies = eval_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 55.51%
variance: 8.398400E-06


# Random pre-commit

### Judge - 5k batches

In [3]:
accuracies = eval_precommit_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 87.36%
variance: 7.724500E-06


### Better judge - 30k batches

In [5]:
accuracies = eval_precommit_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 88.31%
variance: 8.738100E-06


# Adversarial precommit

- evaluate the best adversary, which was found in train_adversary.ipynb

### Judge - 5k batches

In [49]:
adversary = create_mnist_cnn_model()
adversary.load_weights('model_mnist_1epoch_adam1e-5.h5py')

In [8]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversary, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 73.87%
variance: 7.136900E-06


### Better judge - 30k batches

In [9]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversary, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 75.41%
variance: 1.368610E-05


# Optimal adversary - perfect knowledge of judge

- with perfect knowledge of the judge it's trivial to find an optimal adversarial pre-commit class
- choose judge's predicted categories as long as they are not true
- otherwise take the 2nd most probable class according to the judge and hope for a tie, which is a loose in our setting

### Judge - 5k batches

In [11]:
accuracies = eval_optimal_adversary_generator(val_data_generator, judge, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 52.89%
variance: 5.974100E-06


### Better judge - 30k batches

In [55]:
accuracies = eval_optimal_adversary_generator(val_data_generator, judge, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 55.62%
variance: 3.117450E-05


# Conclusion

- much of the gain in judge's accuracy can be explained with the pre-commit only, without the actual debate between the 2 agents
- adversarial precommit indeed managed to decrease the judge's accuracy compared to random precommit
- on the other hand, debate seems to be a useful tool for mitigating the effect of adversary

| precommit type | judge 5k | judge 30k |
|-----------------------|----------|----------|
| random | 87.35% | 88.31% |
| adversarial_top | 79.43% | 80.77% |
| adversarial_30k | 77.72% | 80.60% |
| adversarial_15k | 76.04% | 77.42% |
| adversarial_10k | 75.31% | 76.82% |
| adversarial_7.5k | 76.23% | 77.24% |
| adversarial_5k | 78.31% | 80.12% |
| adversarial_500 | 84.33% | 85.61% |
|-----------------------|----------|----------|
| adversarial_adam 1e-6 | 83.32% | 84.98% |
| adversarial_adam 5e-5 | 75.23% | 76.28% 
| **adversarial_adam 1e-5** | **73.87%** | **75.41%** |
| adversarial_adam 1e-4| 75.06% | 76.50% |
|-----------------------|----------|----------|
| perfect knowledge | 52.89% | 55.62% |
|-----------------------|----------|----------|
| none / baseline | 52.99% | 55.51% |

# Future work

### Judge as a means to resolve disagreement

In [345]:
def get_accuracy(y_pred, y_true):
    correct = (y_pred == y_true).sum()
    print('correct: ', correct)
    return correct / y_true.shape[0]

In [311]:
super_agent_a = create_mnist_cnn_model()
super_agent_a.load_weights('model_mnist_1epoch_adam1e-5.h5py')

super_agent_b = create_mnist_cnn_model()
super_agent_b.load_weights('model_mnist_1epoch_adam5e-5.h5py')

In [312]:
data_x_sparse, data_y = next(val_data_generator)

true_categories = data_y.argmax(axis=1)

predictions_a = super_agent_a.predict(x_test).argmax(axis=1)
predictions_b = super_agent_b.predict(x_test).argmax(axis=1)
predictions_judge = judge.predict(data_x_sparse)

In [313]:
disagreement = predictions_a != predictions_b

resolution = predictions_judge[disagreement, predictions_a[disagreement]] > predictions_judge[disagreement, predictions_b[disagreement]]

res = predictions_b[disagreement]
res[resolution] = predictions_a[disagreement][resolution]

In [315]:
get_accuracy(res, true_categories[disagreement])

0.44427363566487316

In [316]:
get_accuracy(predictions_a[disagreement], true_categories[disagreement])

0.0722521137586472

In [317]:
get_accuracy(predictions_b[disagreement], true_categories[disagreement])

0.8262874711760184

In [318]:
get_accuracy(predictions_a, true_categories)

0.8387

In [319]:
get_accuracy(predictions_b, true_categories)

0.9368

In [320]:
predicted_category_judge = predictions_judge.argmax(axis=1)

In [321]:
get_accuracy(predicted_category_judge[disagreement], true_categories[disagreement])

0.22982321291314373

In [314]:
disagreement.sum()

1301

# Agents of the same power (just different seed)

In [322]:
super_agent_a = create_mnist_cnn_model()
super_agent_a.load_weights('model_mnist_1epoch_adam5e-5_2.h5py')

super_agent_b = create_mnist_cnn_model()
super_agent_b.load_weights('model_mnist_1epoch_adam5e-5.h5py')

In [339]:
data_x_sparse, data_y = next(val_data_generator)

true_categories = data_y.argmax(axis=1)

predictions_a = super_agent_a.predict(x_test).argmax(axis=1)
predictions_b = super_agent_b.predict(x_test).argmax(axis=1)
predictions_judge = judge.predict(data_x_sparse)

In [340]:
disagreement = predictions_a != predictions_b

resolution = predictions_judge[disagreement, predictions_a[disagreement]] > predictions_judge[disagreement, predictions_b[disagreement]]

res = predictions_b[disagreement]
res[resolution] = predictions_a[disagreement][resolution]

In [347]:
get_accuracy(res, true_categories[disagreement])

correct:  137


0.37950138504155123

In [348]:
get_accuracy(predictions_a[disagreement], true_categories[disagreement])

correct:  86


0.23822714681440443

In [349]:
get_accuracy(predictions_b[disagreement], true_categories[disagreement])

correct:  225


0.6232686980609419

In [328]:
get_accuracy(predictions_a, true_categories)

0.9229

In [329]:
get_accuracy(predictions_b, true_categories)

0.9368

In [333]:
predicted_category_judge = predictions_judge.argmax(axis=1)

In [334]:
get_accuracy(predicted_category_judge[disagreement], true_categories[disagreement])

0.21329639889196675

In [330]:
disagreement.sum()

361

In [384]:
all_preds_combined = predictions_a.copy()
all_preds_combined[disagreement] = res

In [385]:
get_accuracy(all_preds_combined, true_categories)

correct:  9280


0.928