In [1]:
import numpy as np
from utils import prepare_data, create_mnist_cnn_model, eval_generator, eval_precommit_generator, sparse_mnist_generator_nonzero, 

batch_size = 128
num_classes = 10
epochs = 12

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [None]:
x_train, y_train, x_test, y_test, input_shape = prepare_data()

In [None]:
val_data_generator = sparse_mnist_generator_nonzero(
    x_test,
    y_test,
    batch_size=x_test.shape[0],
    sparsity=6,
    shuffle=False
)

# Eval judge alone

- judge is a sparse MNIST classifier
- 6 non-zero pixels are randomly sampled from an input image

### Judge - 5k batches

In [67]:
judge = create_mnist_cnn_model(num_classes, input_shape)
judge.load_weights('model_sparse_mnist_generator_nonzero_5k.h5py')

In [68]:
# judge samples 6 pixels on random -> we need to see more runs and look at mean and variance
accuracies = eval_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 52.99%
variance: 9.043600E-06


### Better judge - 30k batches

In [None]:
judge = create_mnist_cnn_model()
judge.load_weights('model_sparse_mnist_generator_nonzero_30k.h5py')

In [64]:
accuracies = eval_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 55.51%
variance: 8.398400E-06


# Random pre-commit

### Judge - 5k batches

In [3]:
accuracies = eval_precommit_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 87.36%
variance: 7.724500E-06


### Better judge - 30k batches

In [5]:
accuracies = eval_precommit_generator(val_data_generator, judge, num_repetitions=10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 88.31%
variance: 8.738100E-06


# Adversarial precommit

- take the best possible classifier of MNIST digits
- and use it's 2nd most probable class as the adversarial choice for pre-commit

### Judge - 5k batches

In [49]:
adversarial_model = create_mnist_cnn_model()
adversarial_model.load_weights('model_mnist_12epochs.h5py')

In [7]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 79.43%
variance: 5.800400E-06


### Better judge - 30k batches

In [50]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 80.77%
variance: 5.198500E-06


# Compare with not so little trained adversary

In [51]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_30ksamples.h5py')

In [175]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [176]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

5.8919548e-05
8.44388e-05
0.00015061667
0.00030424772
0.00067134533
0.0014521435
0.004261467
0.021739153
0.9300379


In [177]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7768, 0.7786, 0.778, 0.7788, 0.7737, 0.7723, 0.7819, 0.7757, 0.7794, 0.7774]

In [178]:
np.mean(accuracies)

0.77726

In [179]:
np.var(accuracies)

7.076400000000022e-06

### Better judge (30k batches, 56%)

In [52]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 80.60%
variance: 8.182900E-06


# Compare with not so well trained adversary (1epoch adam 1e-6)

In [29]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_1epoch_adam1e-6.h5py')

In [217]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [218]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.0057919375
0.0027946897
0.0021697192
0.0017012045
0.0017048022
0.0016837975
0.001756748
0.0025757554
0.003859542


In [219]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.8402,
 0.8337,
 0.8327,
 0.8316,
 0.8342,
 0.8296,
 0.8282,
 0.8332,
 0.8342,
 0.8344]

In [220]:
np.mean(accuracies)

0.8331999999999999

In [221]:
np.var(accuracies)

9.345999999999903e-06

### Better judge (30k batches, 56%)

In [36]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 84.98%
variance: 1.032250E-05


# Compare with not so well trained adversary (1epoch adam 1e-5)

In [28]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_1epoch_adam1e-5.h5py')

In [223]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [224]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.007946745
0.0075184875
0.0073646028
0.009293356
0.0102790715
0.015551019
0.025924066
0.050632354
0.3099722


In [225]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7416,
 0.7402,
 0.7439,
 0.7388,
 0.7387,
 0.7363,
 0.7379,
 0.7353,
 0.7394,
 0.7348]

In [226]:
np.mean(accuracies)

0.73869

In [227]:
np.var(accuracies)

7.136900000000057e-06

# 1epoch of adam 1e-5 with improved judge 30k batches and 56%

In [22]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_1epoch_adam1e-5.h5py')

In [25]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7492,
 0.7533,
 0.7464,
 0.7598,
 0.7544,
 0.7539,
 0.7546,
 0.7566,
 0.7575,
 0.7556]

In [26]:
np.mean(accuracies)

0.7541300000000001

In [27]:
np.var(accuracies)

1.3686100000000143e-05

# Compare with not so well trained adversary (1epoch adam 5e-5)

In [56]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_1epoch_adam5e-5.h5py')

In [235]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [236]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.00020120015
0.00028793706
0.0005427765
0.00092903955
0.0017772022
0.003246927
0.008957823
0.035949793
0.86433595


In [237]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7547, 0.7506, 0.7519, 0.7528, 0.7489, 0.7464, 0.7517, 0.7571, 0.752, 0.7568]

In [238]:
np.mean(accuracies)

0.75229

In [239]:
np.var(accuracies)

9.916900000000071e-06

### Better judge (30k batches, 56%)

In [57]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 76.28%
variance: 1.054410E-05


# Compare with not so well trained adversary (1epoch adam 1e-4)

In [37]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_1epoch_adam1e-4.h5py')

In [229]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [230]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.00020377744
0.00028670774
0.0004772872
0.00090709276
0.0016539244
0.0031683906
0.008767544
0.035903785
0.8663495


In [231]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.75, 0.7513, 0.7528, 0.7493, 0.7477, 0.756, 0.7439, 0.7553, 0.7452, 0.7541]

In [232]:
np.mean(accuracies)

0.75056

In [233]:
np.var(accuracies)

1.533239999999999e-05

### Better judge (30k batches, 56%)

In [38]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 76.50%
variance: 2.174610E-05


# Compare with not so well trained adversary (15k samples)

In [39]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_15ksamples.h5py')

In [204]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [205]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.000103093334
0.00016556526
0.00026611346
0.0005115029
0.0009563338
0.0020637854
0.006247513
0.028183825
0.90365833


In [206]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7588, 0.7614, 0.7554, 0.7685, 0.7638, 0.7681, 0.756, 0.7645, 0.7537, 0.7533]

In [207]:
np.mean(accuracies)

0.7603500000000001

In [208]:
np.var(accuracies)

2.9646499999999944e-05

### Better judge (30k batches, 56%)

In [40]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 77.42%
variance: 1.449610E-05


# Compare with not so well trained adversary (10k samples)

In [41]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_10ksamples.h5py')

In [169]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [170]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.00012411905
0.00020309819
0.00037283497
0.000741312
0.0013381963
0.0029087411
0.007879715
0.035496786
0.87621325


In [171]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7579, 0.7578, 0.7483, 0.761, 0.7533, 0.752, 0.7501, 0.749, 0.7486, 0.7531]

In [172]:
np.mean(accuracies)

0.7531100000000001

In [173]:
np.var(accuracies)

1.7788900000000063e-05

### Better judge (30k batches, 56%)

In [42]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 76.82%
variance: 1.452840E-05


# Compare with not so well trained adversary (7.5k samples)

In [43]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_7500samples.h5py')

In [211]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [212]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.00026031732
0.00039710922
0.00069402973
0.0012838073
0.002340322
0.0048589576
0.013530184
0.06532739
0.777313


In [213]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7656, 0.7574, 0.7604, 0.7619, 0.7589, 0.7648, 0.7674, 0.7593, 0.763, 0.7647]

In [214]:
np.mean(accuracies)

0.76234

In [215]:
np.var(accuracies)

9.772400000000029e-06

### Better judge (30k batches, 56%)

In [44]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 77.24%
variance: 1.531050E-05


# Compare with even less trained adversary (5k samples)

In [45]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_5ksamples.h5py')

In [198]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [199]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.0007173434
0.00081278256
0.0011765662
0.001964301
0.0031793755
0.0070958748
0.018775087
0.08195718
0.6960923


In [200]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
accuracies

[0.7862,
 0.7833,
 0.7875,
 0.7801,
 0.7819,
 0.7828,
 0.7862,
 0.7859,
 0.7769,
 0.7806]

In [201]:
np.mean(accuracies)

0.7831400000000001

In [202]:
np.var(accuracies)

1.016639999999997e-05

### Better judge (30k batches, 56%)

In [46]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 80.12%
variance: 2.667810E-05


# Even worse model

In [47]:
adversarial_model_1epoch = create_mnist_cnn_model()
adversarial_model_1epoch.load_weights('model_mnist_500samples.h5py')

In [152]:
predictions = adversarial_model_1epoch.predict(x_test)
predictions.sort(axis=1)

In [153]:
for i in range(9):
    print((predictions[:, i+1] - predictions[:, i]).mean())

0.019914161
0.008608694
0.010177943
0.00808178
0.010024752
0.01380032
0.019552795
0.032029033
0.047684215


In [154]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)

In [155]:
accuracies

[0.8403, 0.8454, 0.8417, 0.8419, 0.8438, 0.846, 0.8466, 0.8438, 0.8438, 0.8393]

In [156]:
np.mean(accuracies)

0.8432599999999999

In [157]:
np.var(accuracies)

5.284399999999954e-06

### Better judge (30k batches, 56%)

In [48]:
accuracies = eval_precommit_adversarial_generator(x_test, val_data_generator, judge, adversarial_model_1epoch, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 85.61%
variance: 4.547600E-06


# Optimal adversary - perfect knowledge of judge

- trivial
- choose judge's predicted categories as long as they are not true
- otherwise take the 2nd most probable class according to the judge and hope for a tie, which is a loose in our setting

In [54]:
def eval_optimal_adversary_generator(val_data_generator, judge, num_repetitions):
    # calculate true categories
    data_x_sparse, data_y = next(val_data_generator)
    true_categories = data_y.argmax(axis=1)
    accuracies = []
    # we have a noisy judge, so we need repetitions to find variance
    for i in range(num_repetitions):
        data_x_sparse, _ = next(val_data_generator)
        
        predictions = judge.predict(data_x_sparse)
        
        adversary = predictions.argsort(axis=1)
        adversary_precommit = adversary[:, -1]
        
        equal_selections = adversary_precommit == true_categories
        adversary_precommit[equal_selections] = adversary[equal_selections, -2]
        
        acc = eval_judge(predictions, true_categories, adversary_precommit)
        accuracies.append(acc)
    
    return accuracies

In [185]:
accuracies = eval_optimal_adversary_generator(val_data_generator, judge, 10)
accuracies

[0.5318, 0.5293, 0.526, 0.533, 0.5296, 0.5278, 0.5264, 0.5256, 0.5315, 0.5283]

In [186]:
np.mean(accuracies)

0.52893

In [187]:
np.var(accuracies)

5.974100000000048e-06

### Better judge (30k batches, 56%)

In [55]:
accuracies = eval_optimal_adversary_generator(val_data_generator, judge, 10)
print('accuracy: %.2f%%' % (100 * np.mean(accuracies)))
print('variance: %E' % np.var(accuracies))

accuracy: 55.62%
variance: 3.117450E-05


# Conclusion

- much of the gain in judge's accuracy can be explained with the pre-commit only, without the actual debate between the 2 agents
- adversarial precommit indeed managed to decrease the judge's accuracy compared to random precommit
- on the other hand, debate seems to be a useful tool for mitigating the effect of adversary

| precommit type | judge 5k | judge 30k |
|-----------------------|----------|----------|
| random | 87.35% | 88.31% |
| adversarial_top | 79.43% | 80.77% |
| adversarial_30k | 77.72% | 80.60% |
| adversarial_15k | 76.04% | 77.42% |
| adversarial_10k | 75.31% | 76.82% |
| adversarial_7.5k | 76.23% | 77.24% |
| adversarial_5k | 78.31% | 80.12% |
| adversarial_500 | 84.33% | 85.61% |
|-----------------------|----------|----------|
| adversarial_adam 1e-6 | 83.32% | 84.98% |
| adversarial_adam 5e-5 | 75.23% | 76.28% 
| adversarial_adam 1e-5| 73.87% | 75.41% ||
| adversarial_adam 1e-4| 75.06% | 76.50% |
|-----------------------|----------|----------|
| perfect knowledge | 52.89% | 55.62% |
|-----------------------|----------|----------|
| none / baseline | 52.99% | 55.51% |

# Future work

### Judge as a means to resolve disagreement

In [345]:
def get_accuracy(y_pred, y_true):
    correct = (y_pred == y_true).sum()
    print('correct: ', correct)
    return correct / y_true.shape[0]

In [311]:
super_agent_a = create_mnist_cnn_model()
super_agent_a.load_weights('model_mnist_1epoch_adam1e-5.h5py')

super_agent_b = create_mnist_cnn_model()
super_agent_b.load_weights('model_mnist_1epoch_adam5e-5.h5py')

In [312]:
data_x_sparse, data_y = next(val_data_generator)

true_categories = data_y.argmax(axis=1)

predictions_a = super_agent_a.predict(x_test).argmax(axis=1)
predictions_b = super_agent_b.predict(x_test).argmax(axis=1)
predictions_judge = judge.predict(data_x_sparse)

In [313]:
disagreement = predictions_a != predictions_b

resolution = predictions_judge[disagreement, predictions_a[disagreement]] > predictions_judge[disagreement, predictions_b[disagreement]]

res = predictions_b[disagreement]
res[resolution] = predictions_a[disagreement][resolution]

In [315]:
get_accuracy(res, true_categories[disagreement])

0.44427363566487316

In [316]:
get_accuracy(predictions_a[disagreement], true_categories[disagreement])

0.0722521137586472

In [317]:
get_accuracy(predictions_b[disagreement], true_categories[disagreement])

0.8262874711760184

In [318]:
get_accuracy(predictions_a, true_categories)

0.8387

In [319]:
get_accuracy(predictions_b, true_categories)

0.9368

In [320]:
predicted_category_judge = predictions_judge.argmax(axis=1)

In [321]:
get_accuracy(predicted_category_judge[disagreement], true_categories[disagreement])

0.22982321291314373

In [314]:
disagreement.sum()

1301

# Agents of the same power (just different seed)

In [322]:
super_agent_a = create_mnist_cnn_model()
super_agent_a.load_weights('model_mnist_1epoch_adam5e-5_2.h5py')

super_agent_b = create_mnist_cnn_model()
super_agent_b.load_weights('model_mnist_1epoch_adam5e-5.h5py')

In [339]:
data_x_sparse, data_y = next(val_data_generator)

true_categories = data_y.argmax(axis=1)

predictions_a = super_agent_a.predict(x_test).argmax(axis=1)
predictions_b = super_agent_b.predict(x_test).argmax(axis=1)
predictions_judge = judge.predict(data_x_sparse)

In [340]:
disagreement = predictions_a != predictions_b

resolution = predictions_judge[disagreement, predictions_a[disagreement]] > predictions_judge[disagreement, predictions_b[disagreement]]

res = predictions_b[disagreement]
res[resolution] = predictions_a[disagreement][resolution]

In [347]:
get_accuracy(res, true_categories[disagreement])

correct:  137


0.37950138504155123

In [348]:
get_accuracy(predictions_a[disagreement], true_categories[disagreement])

correct:  86


0.23822714681440443

In [349]:
get_accuracy(predictions_b[disagreement], true_categories[disagreement])

correct:  225


0.6232686980609419

In [328]:
get_accuracy(predictions_a, true_categories)

0.9229

In [329]:
get_accuracy(predictions_b, true_categories)

0.9368

In [333]:
predicted_category_judge = predictions_judge.argmax(axis=1)

In [334]:
get_accuracy(predicted_category_judge[disagreement], true_categories[disagreement])

0.21329639889196675

In [330]:
disagreement.sum()

361

In [384]:
all_preds_combined = predictions_a.copy()
all_preds_combined[disagreement] = res

In [385]:
get_accuracy(all_preds_combined, true_categories)

correct:  9280


0.928