# MNIST with Virtual Branching

In [1]:
import tensorflow as tf
import numpy as np
import os
from scipy.special import softmax
import matplotlib.pyplot as plt
import time
from sklearn.manifold import TSNE

In [2]:
import vbranch as vb

In [3]:
save = False
model_id = 1
architecture = 'fcn'

## Load Data

In [4]:
dataset = 'toy'
num_classes = 10
num_features = 784
samples_per_class = 200
(X_train, y_train), (X_test, y_test) = vb.utils.get_data(dataset, architecture, num_classes,
                                                         num_features, samples_per_class)

Creating dataset (hypercube)...
Training set: (2000, 784)
Testing set: (2000, 784)


In [5]:
x_shape = (None,) + X_train.shape[1:]
y_shape = (None, num_classes)

## Train

### Build Model

In [6]:
BATCH_SIZE = 32
EPOCHS = 10
STEPS_PER_EPOCH = 100
NUM_BRANCHES = 3
SHARED_FRAC = 0
model_path = os.path.join('models', 'vb-{}-{}-B{:d}-S{:.2f}_{:d}'.format(dataset, architecture,
    NUM_BRANCHES, SHARED_FRAC, model_id))

In [7]:
model_path

'models/vb-toy-fcn-B3-S0.00_1'

In [8]:
tf.reset_default_graph()

x = tf.placeholder('float32', x_shape, name='x')
y = tf.placeholder('float32', y_shape, name='y')

batch_size = tf.placeholder('int64', name='batch_size')

iterators = [None] * NUM_BRANCHES
inputs = [None] * NUM_BRANCHES
labels_one_hot = [None] * NUM_BRANCHES

for i in range(NUM_BRANCHES):
    dataset = tf.data.Dataset.from_tensor_slices((x,y)).\
        repeat().batch(batch_size).shuffle(buffer_size=4*BATCH_SIZE)

    iterators[i] = dataset.make_initializable_iterator()
    inputs[i], labels_one_hot[i] = iterators[i].get_next('input')

In [9]:
def build_model(architecture,inputs,labels, num_classes,num_branches,model_id,
        shared_frac, test=False):
        
    if architecture == 'fcn':
        model = vb.vbranch_simple_fcn(inputs,
            ([128]*num_branches, int(128*shared_frac)),
            ([num_classes]*num_branches, int(num_classes*shared_frac)),
            branches=num_branches, name='model_' + str(model_id))
    elif architecture == 'cnn':
        model = vb.vbranch_simple_cnn(inputs, (num_classes, 0),
            ([16]*num_branches, int(16*shared_frac)),
            ([32]*num_branches, int(32*shared_frac)),
            branches=num_branches, name='model_' + str(model_id))
    else:
        raise ValueError('invalid model')

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    model.compile(optimizer, 'softmax_cross_entropy_with_logits',
                    labels_one_hot=labels, test=test)
#     if not test:
    model.summary()

    return model

In [10]:
model = build_model(architecture, inputs, labels_one_hot, num_classes,
        NUM_BRANCHES, model_id, SHARED_FRAC)

i  Layer name  Output shape  Num param  Inbound  
-------------------------------------------------
   Input       [None,784]                        
-------------------------------------------------
   Input       [None,784]                        
-------------------------------------------------
   Input       [None,784]                        
-------------------------------------------------
0  fc1         [None,128]    301440     input    
               [None,128]                        
               [None,128]                        
-------------------------------------------------
1  bn1         [None,128]    768        fc1      
               [None,128]                        
               [None,128]                        
-------------------------------------------------
2  relu1       [None,128]    0          bn1      
               [None,128]                        
               [None,128]                        
-------------------------------------------------


In [11]:
# Build copy of model for testing
x_place = tf.placeholder('float32', x_shape, name='x_test')
y_place = tf.placeholder('float32', y_shape, name='y_test')
test_model = build_model(architecture, x_place, [y_place]*NUM_BRANCHES, num_classes,
    NUM_BRANCHES, model_id, SHARED_FRAC, test=True)

i  Layer name  Output shape  Num param  Inbound  
-------------------------------------------------
   Input       [None,784]                        
-------------------------------------------------
   Input       [None,784]                        
-------------------------------------------------
   Input       [None,784]                        
-------------------------------------------------
0  fc1         [None,128]    301440     input    
               [None,128]                        
               [None,128]                        
-------------------------------------------------
1  bn1         [None,128]    768        fc1      
               [None,128]                        
               [None,128]                        
-------------------------------------------------
2  relu1       [None,128]    0          bn1      
               [None,128]                        
               [None,128]                        
-------------------------------------------------


### Run Ops

In [12]:
history = model.fit(iterators, X_train, y_train, EPOCHS, STEPS_PER_EPOCH,
        BATCH_SIZE, validation=(X_test, y_test), test_model=test_model,
        save_model_path=model_path)

Epoch 1/10
 - 1s - train_loss_1: 1.3399 - train_loss_2: 1.2996 - train_loss_3: 1.2450 - train_acc_1: 0.6794 - train_acc_2: 0.7000 - train_acc_3: 0.7244 - train_acc_ensemble: 0.3381 - val_loss_1: 0.9201 - val_loss_2: 0.8933 - val_loss_3: 0.8708 - val_acc_1: 0.8910 - val_acc_2: 0.9015 - val_acc_3: 0.9110 - val_acc_ensemble: 0.9460
Epoch 2/10
 - 1s - train_loss_1: 0.5725 - train_loss_2: 0.5648 - train_loss_3: 0.5466 - train_acc_1: 0.9762 - train_acc_2: 0.9775 - train_acc_3: 0.9744 - train_acc_ensemble: 0.4009 - val_loss_1: 0.7010 - val_loss_2: 0.6732 - val_loss_3: 0.6655 - val_acc_1: 0.9150 - val_acc_2: 0.9275 - val_acc_3: 0.9305 - val_acc_ensemble: 0.9550
Epoch 3/10
 - 0s - train_loss_1: 0.3940 - train_loss_2: 0.3871 - train_loss_3: 0.3666 - train_acc_1: 0.9912 - train_acc_2: 0.9925 - train_acc_3: 0.9959 - train_acc_ensemble: 0.4381 - val_loss_1: 0.5880 - val_loss_2: 0.5753 - val_loss_3: 0.5661 - val_acc_1: 0.9325 - val_acc_2: 0.9340 - val_acc_3: 0.9360 - val_acc_ensemble: 0.9545
Epoch 4

## Load Model

In [None]:
test_init_ops = ['test_init_op_'+str(i+1) for i in range(NUM_BRANCHES)]
losses = ['loss_'+str(i+1)+':0' for i in range(NUM_BRANCHES)]
train_acc_ops = ['train_acc_'+str(i+1)+':0' for i in range(NUM_BRANCHES)]

inputs = ['input_{}:0'.format(i+1) for i in range(NUM_BRANCHES)]
labels_one_hot = ['input_{}:1'.format(i+1) for i in range(NUM_BRANCHES)]
outputs = ['model_{}/output_vb{}:0'.format(model_id, i+1) for i in range(NUM_BRANCHES)]

In [None]:
with tf.Session() as sess:
    model_path = os.path.join('models', 'vb-mnist-{}-B{:d}-S{:.2f}_{:d}'.format(architecture,
        NUM_BRANCHES, SHARED_FRAC, model_id))
    meta_path = os.path.join(model_path, 'ckpt.meta')
    ckpt = tf.train.get_checkpoint_state(model_path)

    imported_graph = tf.train.import_meta_graph(meta_path)
    imported_graph.restore(sess, ckpt.model_checkpoint_path)

    sess.run(test_init_ops, feed_dict={'batch_size:0': len(X_test)})
    val_losses, val_acc, indiv_accs = sess.run([losses, 'test_acc:0', train_acc_ops])
    
    sample_size = 250
    sess.run(test_init_ops, feed_dict={'batch_size:0':sample_size})
    X_test_samples, y_test_samples, features = sess.run([inputs, labels_one_hot, outputs])

In [None]:
print('Loss:', np.mean(val_losses))
print('Acc:', val_acc)
print('Indiv accs:', indiv_accs)

## Feature Visualization

In [None]:
mean_features = np.mean(features, axis=0)
print(mean_features.shape)

In [None]:
start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(mean_features)

print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-start))

In [None]:
labels = np.argmax(y_test_samples[0], axis=-1)

In [13]:
plt.scatter(tsne_results[:,0], tsne_results[:,1], c=labels, cmap=plt.cm.jet)
plt.colorbar()
plt.show()

NameError: name 'tsne_results' is not defined