In [None]:
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.size'] = 8
plt.rcParams['mathtext.fontset'] = 'cm'
%matplotlib inline

import tensorflow as tf

from utils import conv2d
from plotting import (angle_plot, corr_plot, stddev_angles, 
                      weight_dist, get_angles, kde_scipy, hist2d)

from bnn import BinaryNeuralNetwork, ClassifierGenerator
from data.load_data import load_cifar10, CIFAR10

# Quickly load changes from imported functions, e.g. from plotting.py
# %load_ext autoreload
# %autoreload 2

In [None]:
data_set = CIFAR10
_, _, _, _, test_data, test_labels = load_cifar10(train=False)

In [None]:
n_imgs = 500   # Try 50 for faster plot generation

test_data = test_data[0:n_imgs]
test_labels = test_labels[0:n_imgs]

In [None]:
mode = ['b', 't'][0]  # Binary vs ternary

# Params need to match those used for training the network (e.g. in run_bnn.py)

if mode == 'b':
    bnn = BinaryNeuralNetwork(
        in_dim=(32, 32, 3), 
        out_dim=10, 
        network_params={
            'conv': [128, 128, 256, 256, 512, 512],
            'max_pool': [False, True, False, True, False, True],
            'filter_size': 3,
            'fc': [1024, 1024],
            'bin_acts': [True] * 6,
            'bin_weights': [True] * 6,
            'levels': 2,
            'threshold': 0.,
        },
        param_file='tmp/cifar-10_model_2_levels.ckpt'
    )
    exp = np.arccos(np.sqrt(2/np.pi))

elif mode == 't':
    bnn = BinaryNeuralNetwork(
        in_dim=(32, 32, 3), 
        out_dim=10, 
        network_params={
            'conv': [128, 128, 256, 256, 512, 512],
            'max_pool': [False, True, False, True, False, True],
            'filter_size': 3,
            'fc': [1024, 1024],
            'bin_acts': [True] * 6,
            'bin_weights': [True] * 6,
            'levels': 3,
            'threshold': 0.02,
        },
        param_file='tmp/cifar-10_model_3_levels.ckpt'
    )
    exp = 0.5956  # From  writeup/imgs/hd_figures.ipynb

else:
    raise ValueError('Invalid mode')

In [None]:
tensors = bnn.inspect_network(test_data, test_labels)
conv_tensors = tensors['conv_tensors']
fc_tensors = tensors['fc_tensors']
x = tensors['x']

In [None]:
print bnn.classify(test_data, test_labels)['score'].mean()

In [None]:
#separating tensors (only convolutional in this case)
wc = [a['wc'] for a in conv_tensors]
wb = [a['wb'] for a in conv_tensors]
ac = [x] + [a['pre_bin_act'] for a in conv_tensors]
ab = [x] + [a['post_bin_act'] for a in conv_tensors]

In [None]:
# Empirical standard deviations of continuous weights, important for ternary network analysis
print map(np.std,  wc)

In [None]:
print 'Proportion of weight values by layer'
for wb_ in wb:
    vals, cts = np.unique(wb_.ravel(), return_counts=True)
    print vals, cts / (1.0 * cts.sum())

In [None]:
for _ in ac:
    print _.shape

In [None]:
#computing dot products between continuous/binary weights and activations
config = tf.ConfigProto(device_count = {'GPU': 0})
tf.reset_default_graph()
with tf.Session(config=config) as sess:
    ac_dot_wb = [sess.run(conv2d(a, b)) for a,b in zip(ac, wb)]
    ab_dot_wb = [sess.run(conv2d(a, b)) for a,b in zip(ab, wb)]
    ac_dot_wc = [sess.run(conv2d(a, b)) for a,b in zip(ac, wc)]
    ab_dot_wc = [sess.run(conv2d(a, b)) for a,b in zip(ab, wc)]

In [None]:
fig, axes = corr_plot(
    x_=ab_dot_wb, x_label='$A\cdot W^{}$'.format(mode), 
    y_=ab_dot_wc, y_label='$A\cdot W^c$',
#     save_path=None)
    save_path='output/{}_weight_bin_mode_{}.pdf'.format(data_set, mode))

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
q = 2
_ = hist2d(fig, ax, ab_dot_wb[q].ravel(), ab_dot_wc[q].ravel(), q+1)
fontsize = 12
ax.set_xlabel('$A\cdot W^{}$'.format(mode), fontsize=fontsize)
ax.set_ylabel('$A\cdot W^c$', fontsize=fontsize)
ymin, ymax = ax.get_ylim()
if mode == 'b':
    ax.axhspan(ymin, 0, xmin=0.5, alpha=0.5, color='gray', zorder=1)
    ax.axhspan(0, ymax, xmax=0.5, alpha=0.5, color='gray', zorder=1)
if mode == 't':
    ax.axvline(0, c='black', ls='--')
    ax.axhline(0, c='black', ls='--')

ax.xaxis.set_ticks([])
ax.yaxis.set_ticks([])
# plt.tight_layout()
fn = 'output/{}_hist2d_small_mode_{}.pdf'.format(data_set, mode)
plt.savefig(fn, dpi=200)

In [None]:
fig, axes = corr_plot(
    x_=ac_dot_wb[1:], x_label='$A^c\cdot W^{}$'.format(mode), 
    y_=ab_dot_wb[1:], y_label='$A^{}\cdot W^{}$'.format(mode, mode),
#     save_path=None)
    save_path='output/{}_activ_bin_mode_{}.pdf'.format(data_set, mode))

# Angle Plots

In [None]:
#flattening out continuous and binary weights into 2d vectors for angle computation
c_vecs = []
b_vecs = []
for (c,b) in zip(wc, wb):
    c_vecs.append(c.reshape(-1, c.shape[-1]).T)
    b_vecs.append(b.reshape(-1, b.shape[-1]).T)

In [None]:
for zoom in [True, False]:
    fig, ax = angle_plot(
        c_vecs, b_vecs, data_set='CIFAR10', 
        save_path=None,
        zoom=zoom, 
        exp=exp,
    )
    ax.set_title(r'$\angle (w^{}, w^c)$ for {}'.format(mode, 'CIFAR10')) 
    save_path = 'output/{}_angle_plot_zoom_{}_mode_{}.pdf'.format(data_set, zoom, mode)
    plt.savefig(save_path)

In [None]:
# Angles corresponding to the peaks by layer
for c, b in zip(c_vecs, b_vecs):
    angles = get_angles(c, b)
    x_grid = np.arange(0, np.pi/2., 0.001)
    pdf = kde_scipy(angles, x_grid, bandwidth=0.01)
    idx = np.argmax(pdf)
    print x_grid[idx]

In [None]:
stddev_angles(c_vecs, b_vecs, data_set='',
              save_path='output/{}_mode_{}_w_sigmas.pdf'.format(data_set, mode))

In [None]:
wcp = [wc[i].ravel()[0:10000] for i in range(6)]
wbp = [wb[i].ravel()[0:10000] for i in range(6)]

yscale = {'b': 'log', 't': 'linear'}[mode]
fig, ax = weight_dist(wcp, yscale=yscale)
if mode == 't':
    threshold = bnn.network_params['threshold']
    for x in [-1 * threshold, threshold]:
        ax.axvline(x=x, c='black')
    ax.set_yticks([])
    plt.legend(loc='best', labelspacing=0.1, prop={'size': 8}) 

save_path='output/{}_mode_{}_weight_dist.pdf'.format(data_set, mode)
plt.savefig(save_path)

# Look at impact of binarization of weights on network performance

In [None]:
def test_w_bin_pattern(pattern=[True] * 6):
    bnn = BinaryNeuralNetwork(
        in_dim=(32, 32, 3), 
        out_dim=10, 
        network_params={
            'conv': [128, 128, 256, 256, 512, 512],
            'max_pool': [False, True, False, True, False, True],
            'filter_size': 3,
            'fc': [1024, 1024],
            'bin_acts': [True] * 6,
            'bin_weights': pattern,
            'levels': 2,
            'threshold': 0,
        },
        param_file='tmp/cifar-10_model_2_levels.ckpt'
    )
    assert False, 'Broken, need to refit batch norm weights'
    return bnn.classify(test_data, test_labels)['score'].mean()

In [None]:
for i in range(7):
    pattern = [True] * 6
    if i < 6:
        pattern[i] = False

    print pattern
    scores = test_w_bin_pattern(pattern)
    print scores

In [None]:
pattern = [False] * 6
print pattern
scores = test_w_bin_pattern(pattern)
print scores

# Permutations

In [None]:
ab_perm = []
for item in ab:
    shp = item.shape
    ab_perm.append(np.random.permutation(item.ravel()).reshape(shp))

#computing dot products between continuous/binary weights and activations
config = tf.ConfigProto(device_count = {'GPU': 0})
tf.reset_default_graph()
with tf.Session(config=config) as sess:
    abperm_dot_wb = [sess.run(conv2d(a, w)) for a, w in zip(ab_perm, wb)]
    abperm_dot_wc = [sess.run(conv2d(a, w)) for a, w in zip(ab_perm, wc)]

fig, ax = corr_plot(
    x_=abperm_dot_wb, x_label='$Aperm\cdot W^b$', 
    y_=abperm_dot_wc, y_label='$Aperm\cdot W^c$',
#     save_path=None)
    save_path='output/{}_weight_random_perm_bin.pdf'.format(data_set))