<a href="https://colab.research.google.com/github/HayeonLee/Samsung_DS_Assignment/blob/master/Network_Sparsification_Assignment_(%ED%99%A9%EC%84%B1%EC%A3%BC_%EA%B5%90%EC%88%98%EB%8B%98_%EC%97%B0%EA%B5%AC%EC%8B%A4).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Network Sparsification Assignment (황성주 교수님 연구실)

## Import Tensorflow and other libraries

In [0]:
#from __future__ import absolute_import, division, print_function, unicode_literals
#기존에 설치된 다른 버전의 tensorflow를 제거합니다.
!pip uninstall tensorboard -y
!pip uninstall tensorflow-gpu -y
!pip uninstall tensorflow -y
#tensorflow gpu 버전을 설치합니다
!pip install tensorflow-gpu==1.14

In [0]:
import tensorflow as tf # tensorflow를 import해줍니다
tf.__version__ # 내가 사용할 tensorflow의 버전을 나타냅니다

In [0]:
# pretrain된 lenet의 체크포인트 파일을 가져옵니다.
!mkdir -p results/
!wget -O lenet_dense_pretrained.zip https://github.com/HayeonLee/sparsification_samsung/blob/master/lenet_dense_pretrained.zip?raw=true
!unzip lenet_dense_pretrained.zip -d results/
!rm lenet_dense_pretrained.zip
!ls
!ls results/pretrained/

In [0]:
# 필요한 라이브러리를 임포트합니다.
from __future__ import print_function
import time
import os
from pylab import *
import numpy as np
from tensorflow.python.client import device_lib
from tensorflow.contrib.distributions import RelaxedBernoulli
from tensorflow.examples.tutorials.mnist import input_data

## Define the functions and utils

In [0]:
# 자주 쓰는 텐서플로우 함수의 약어를 지정합니다.
logit = lambda x: tf.log(x + 1e-20) - tf.log(1-x + 1e-20)
softplus = tf.nn.softplus
relu = tf.nn.relu

dense = tf.layers.dense
flatten = tf.contrib.layers.flatten

def conv(x, filters, kernel_size=3, strides=1, **kwargs):
    return tf.layers.conv2d(x, filters, kernel_size, strides,
            data_format='channels_first', **kwargs)

def pool(x, **kwargs):
    return tf.layers.max_pooling2d(x, 2, 2,
            data_format='channels_first', **kwargs)

def global_avg_pool(x):
    return tf.reduce_mean(x, axis=[2, 3])

layer_norm = tf.contrib.layers.layer_norm

In [0]:
# utils/train.py
# 필요한 함수를 정의합니다.
def cross_entropy(logits, labels):
    return tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels)

def weight_decay(decay, var_list=None):
    var_list = tf.trainable_variables() if var_list is None else var_list
    return decay*tf.add_n([tf.nn.l2_loss(var) for var in var_list])

def accuracy(logits, labels):
    correct = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    return tf.reduce_mean(tf.cast(correct, tf.float32))
  
def digamma_approx(x):
# @MISC {1446110,
# TITLE = {Approximating the Digamma function},
# AUTHOR = {njuffa (https://math.stackexchange.com/users/114200/njuffa)},
# HOWPUBLISHED = {Mathematics Stack Exchange},
# NOTE = {URL:https://math.stackexchange.com/q/1446110 (version: 2015-09-22)},
# EPRINT = {https://math.stackexchange.com/q/1446110},
# URL = {https://math.stackexchange.com/q/1446110}}
    def digamma_over_one(x):
        return tf.log(x + 0.4849142940227510) \
                - 1/(1.0271785180163817*x)
    return digamma_over_one(x+1) - 1./x

In [0]:
# # log를 출력하기 위한 함수를 선언합니다.
class Accumulator():
    def __init__(self, *args):
        self.args = args
        self.argdict = {}
        for i, arg in enumerate(args):
            self.argdict[arg] = i
        self.sums = [0]*len(args)
        self.cnt = 0

    def accum(self, val):
        val = [val] if type(val) is not list else val
        val = [v for v in val if v is not None]
        assert(len(val) == len(self.args))
        for i in range(len(val)):
            self.sums[i] += val[i]
        self.cnt += 1

    def clear(self):
        self.sums = [0]*len(self.args)
        self.cnt = 0

    def get(self, arg, avg=True):
        i = self.argdict.get(arg, -1)
        assert(i is not -1)
        return (self.sums[i]/self.cnt if avg else self.sums[i])

    def print_(self, header=None, epoch=None, it=None, time=None,
            logfile=None, do_not_print=[], as_int=[],
            avg=True):
        line = '' if header is None else header + ': '
        if epoch is not None:
            line += ('epoch %d, ' % epoch)
        if it is not None:
            line += ('iter %d, ' % it)
        if time is not None:
            line += ('(%.3f secs), ' % time)

        args = [arg for arg in self.args if arg not in do_not_print]

        for arg in args[:-1]:
            val = self.sums[self.argdict[arg]]
            if avg:
                val /= self.cnt
            if arg in as_int:
                line += ('%s %d, ' % (arg, int(val)))
            else:
                line += ('%s %f, ' % (arg, val))
        val = self.sums[self.argdict[args[-1]]]
        if avg:
            val /= self.cnt
        if arg in as_int:
            line += ('%s %d, ' % (arg, int(val)))
        else:
            line += ('%s %f' % (args[-1], val))
        print(line)

        if logfile is not None:
            logfile.write(line + '\n')

## Prepare the dataset: MNIST

In [0]:
MNIST_PATH = './mnist'

def mnist_input(batch_size):
    mnist = input_data.read_data_sets(MNIST_PATH, one_hot=True, validation_size=0)
    n_train_batches = mnist.train.num_examples/batch_size
    n_test_batches = mnist.test.num_examples/batch_size
    return mnist, n_train_batches, n_test_batches

##Create models

### *아래 코드에서 Dropout Type을 바꿔주시면 됩니다.*
- None

- 'bbdropout'

- 'sbpdropout'

In [0]:
# choose type: [None, 'bbdropout', 'sbpdropout']
dropout_type = 'bbdropout'

In [0]:
# fully connected layers로 구성된 lenet을 선언합니다. 
def lenet_dense(x, y, training, name='lenet', reuse=None,
        dropout=None, **dropout_kwargs):
    dropout_ = lambda x, subname: x if dropout is None else \
            dropout(x, training, name=name+subname, reuse=reuse,
                    **dropout_kwargs)
    x = dense(dropout_(x, '/dropout1'), 500, activation=relu,
            name=name+'/dense1', reuse=reuse)
    x = dense(dropout_(x, '/dropout2'), 300, activation=relu,
            name=name+'/dense2', reuse=reuse)
    x = dense(dropout_(x, '/dropout3'), 10, name=name+'/dense3', reuse=reuse)

    net = {}
    all_vars = tf.get_collection('variables', scope=name)
    net['qpi_vars'] = [v for v in all_vars if 'qpi_vars' in v.name]
    net['pzx_vars'] = [v for v in all_vars if 'pzx_vars' in v.name]
    net['weights'] = [v for v in all_vars \
            if 'qpi_vars' not in v.name and 'pzx_vars' not in v.name]

    net['cent'] = cross_entropy(x, y)
    net['wd'] = weight_decay(1e-4, var_list=net['weights'])
    net['acc'] = accuracy(x, y)

    prefix = 'train_' if training else 'test_'
    net['kl'] = tf.get_collection('kl')
    net['pi'] = tf.get_collection(prefix+'pi')
    net['n_active'] = tf.get_collection(prefix+'n_active')

    return net

## Define the Beta-Bernoulli Dropout

In [0]:
lgamma = tf.lgamma
Euler = 0.577215664901532

def bbdropout(x, training,
        alpha=1e-4, thres=1e-2, a_init=-1., tau=1e-1, center_init=1.0,
        approx_digamma=True, scale_kl=None, dep=False,
        unit_scale=True, collect=True,
        name='bbdropout', reuse=None):

    N = tf.shape(x)[0]
    K = x.shape[1].value
    is_conv = len(x.shape)==4
    log = lambda x: tf.log(x + 1e-20)

    with tf.variable_scope(name+'/qpi_vars', reuse=reuse):
        with tf.device('/cpu:0'):
            a = softplus(tf.get_variable('a_uc', shape=[K],
                initializer=tf.constant_initializer(a_init)))
            b = softplus(tf.get_variable('b_uc', shape=[K]))

    _digamma = digamma_approx 
    kl = (a-alpha)/a * (-Euler - _digamma(b) - 1/b) \
            + log(a*b) - log(alpha) - (b-1)/b
    pi = (1 - tf.random_uniform([K])**(1/b))**(1/a) if training else \
            b*tf.exp(lgamma(1+1/a) + lgamma(b) - lgamma(1+1/a+b))
    
    if training:
        z = RelaxedBernoulli(tau, logits=logit(pi)).sample(N)
    else:
        pi_ = tf.where(tf.greater(pi, thres), pi, tf.zeros_like(pi))
        z = tf.tile(tf.expand_dims(pi_, 0), [N, 1])
    n_active = tf.reduce_sum(tf.cast(tf.greater(pi, thres), tf.int32))

    if scale_kl is None:
        kl = tf.reduce_sum(kl)
    else:
        kl = scale_kl * tf.reduce_mean(kl)

    if collect:
        if reuse is not True:
            tf.add_to_collection('kl', kl)
        prefix = 'train_' if training else 'test_'
        tf.add_to_collection(prefix+'pi', pi)
        tf.add_to_collection(prefix+'n_active', n_active)

    z = tf.reshape(z, ([-1, K, 1, 1] if is_conv else [-1, K]))
    return x*z


## Define the SBP Dropout

In [0]:
# copied from https://github.com/necludov/group-sparsity-sbp
from tensorflow.python.ops.distributions import special_math

def phi(x):
    return 0.5*tf.erfc(-x/tf.sqrt(2.0))

def __erfinv(x):
    w = -tf.log((1.0-x)*(1.0+x)-1e-5)
    p_small = 2.81022636e-08*tf.ones_like(x)
    p_small = 3.43273939e-07 + p_small*(w-2.5)
    p_small = -3.5233877e-06 + p_small*(w-2.5)
    p_small = -4.39150654e-06 + p_small*(w-2.5)
    p_small = 0.00021858087 + p_small*(w-2.5)
    p_small = -0.00125372503 + p_small*(w-2.5)
    p_small = -0.00417768164 + p_small*(w-2.5)
    p_small = 0.246640727 + p_small*(w-2.5)
    p_small = 1.50140941 + p_small*(w-2.5)

    p_big = -0.000200214257*tf.ones_like(x)
    p_big = 0.000100950558 + p_big*(tf.sqrt(w) - 3.0)
    p_big = 0.00134934322 + p_big*(tf.sqrt(w) - 3.0)
    p_big = -0.00367342844 + p_big*(tf.sqrt(w) - 3.0)
    p_big = 0.00573950773 + p_big*(tf.sqrt(w) - 3.0)
    p_big = -0.0076224613 + p_big*(tf.sqrt(w) - 3.0)
    p_big = 0.00943887047 + p_big*(tf.sqrt(w) - 3.0)
    p_big = 1.00167406 + p_big*(tf.sqrt(w) - 3.0)
    p_big = 2.83297682 + p_big*(tf.sqrt(w) - 3.0)

    small_mask = tf.cast(tf.less(w, 5.0*tf.ones_like(w)), tf.float32)
    big_mask = tf.cast(tf.greater_equal(w, 5.0*tf.ones_like(w)), tf.float32)
    p = p_small*small_mask + p_big*big_mask
    return p*x

def erfinv(x):
    return special_math.ndtri((x+1.)/2.0)/tf.sqrt(2.)

def erfcx(x):
    """M. M. Shepherd and J. G. Laframboise,
       MATHEMATICS OF COMPUTATION 36, 249 (1981)
    """
    K = 3.75
    y = (tf.abs(x)-K) / (tf.abs(x)+K)
    y2 = 2.0*y
    (d, dd) = (-0.4e-20, 0.0)
    (d, dd) = (y2 * d - dd + 0.3e-20, d)
    (d, dd) = (y2 * d - dd + 0.97e-19, d)
    (d, dd) = (y2 * d - dd + 0.27e-19, d)
    (d, dd) = (y2 * d - dd + -0.2187e-17, d)
    (d, dd) = (y2 * d - dd + -0.2237e-17, d)
    (d, dd) = (y2 * d - dd + 0.50681e-16, d)
    (d, dd) = (y2 * d - dd + 0.74182e-16, d)
    (d, dd) = (y2 * d - dd + -0.1250795e-14, d)
    (d, dd) = (y2 * d - dd + -0.1864563e-14, d)
    (d, dd) = (y2 * d - dd + 0.33478119e-13, d)
    (d, dd) = (y2 * d - dd + 0.32525481e-13, d)
    (d, dd) = (y2 * d - dd + -0.965469675e-12, d)
    (d, dd) = (y2 * d - dd + 0.194558685e-12, d)
    (d, dd) = (y2 * d - dd + 0.28687950109e-10, d)
    (d, dd) = (y2 * d - dd + -0.63180883409e-10, d)
    (d, dd) = (y2 * d - dd + -0.775440020883e-09, d)
    (d, dd) = (y2 * d - dd + 0.4521959811218e-08, d)
    (d, dd) = (y2 * d - dd + 0.10764999465671e-07, d)
    (d, dd) = (y2 * d - dd + -0.218864010492344e-06, d)
    (d, dd) = (y2 * d - dd + 0.774038306619849e-06, d)
    (d, dd) = (y2 * d - dd + 0.4139027986073010e-05, d)
    (d, dd) = (y2 * d - dd + -0.69169733025012064e-04, d)
    (d, dd) = (y2 * d - dd + 0.490775836525808632e-03, d)
    (d, dd) = (y2 * d - dd + -0.2413163540417608191e-02, d)
    (d, dd) = (y2 * d - dd + 0.9074997670705265094e-02, d)
    (d, dd) = (y2 * d - dd + -0.26658668435305752277e-01, d)
    (d, dd) = (y2 * d - dd + 0.59209939998191890498e-01, d)
    (d, dd) = (y2 * d - dd + -0.84249133366517915584e-01, d)
    (d, dd) = (y2 * d - dd + -0.4590054580646477331e-02, d)
    d = y * d - dd + 0.1177578934567401754080e+01
    result = d/(1.0+2.0*tf.abs(x))
    result = tf.where(tf.is_nan(result), tf.ones_like(result), result)
    result = tf.where(tf.is_inf(result), tf.ones_like(result), result)

    negative_mask = tf.cast(tf.less(x, 0.0), tf.float32)
    positive_mask = tf.cast(tf.greater_equal(x, 0.0), tf.float32)
    negative_result = 2.0*tf.exp(x*x)-result
    negative_result = tf.where(tf.is_nan(negative_result), tf.ones_like(negative_result), negative_result)
    negative_result = tf.where(tf.is_inf(negative_result), tf.ones_like(negative_result), negative_result)
    result = negative_mask * negative_result + positive_mask * result
    return result

def phi_inv(x):
    return tf.sqrt(2.0)*erfinv(2.0*x-1)

def mean_truncated_log_normal_straight(mu, sigma, a, b):
    alpha = (a - mu)/sigma
    beta = (b - mu)/sigma
    z = phi(beta) - phi(alpha)
    mean = tf.exp(mu+sigma*sigma/2.0)/z*(phi(sigma-alpha) - phi(sigma-beta))
    return mean

def mean_truncated_log_normal_reduced(mu, sigma, a, b):
    alpha = (a - mu)/sigma
    beta = (b - mu)/sigma
    z = phi(beta) - phi(alpha)
    mean = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp(b-beta*beta/2)
    mean = mean - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp(a-alpha*alpha/2)
    mean = mean/(2*z)
    return mean

def mean_truncated_log_normal(mu, sigma, a, b):
    return mean_truncated_log_normal_reduced(mu, sigma, a, b)

def median_truncated_log_normal(mu, sigma, a, b):
    alpha = (a - mu)/sigma
    beta = (b - mu)/sigma
    gamma = phi(alpha)+0.5*(phi(beta)-phi(alpha))
    return tf.exp(phi_inv(gamma)*sigma+mu)

def snr_truncated_log_normal(mu, sigma, a, b):
    alpha = (a - mu)/sigma
    beta = (b - mu)/sigma
    z = phi(beta) - phi(alpha)
    ratio = erfcx((sigma-beta)/tf.sqrt(2.0))*tf.exp((b-mu)-beta**2/2.0)
    ratio = ratio - erfcx((sigma-alpha)/tf.sqrt(2.0))*tf.exp((a-mu)-alpha**2/2.0)
    denominator = 2*z*erfcx((2.0*sigma-beta)/tf.sqrt(2.0))*tf.exp(2.0*(b-mu)-beta**2/2.0)
    denominator = denominator - 2*z*erfcx((2.0*sigma-alpha)/tf.sqrt(2.0))*tf.exp(2.0*(a-mu)-alpha**2/2.0)
    denominator = denominator - ratio**2
    ratio = ratio/tf.sqrt(denominator)
    return ratio

def sample_truncated_normal(mu, sigma, a, b):
    alpha = (a - mu)/sigma
    beta = (b - mu)/sigma
    gamma = phi(alpha)+tf.random_uniform(mu.shape)*(phi(beta)-phi(alpha))
    return tf.clip_by_value(phi_inv(tf.clip_by_value(gamma, 1e-5, 1.0-1e-5))*sigma+mu, a, b)

def sbpdropout(x, training,
        thres=1.0, scale_kl=None, collect=True,
        name='sbpdropout', reuse=None):

    min_log = -20.0
    max_log = 0.0

    axis = 1

    params_shape = np.ones(x.get_shape().ndims)
    params_shape[axis] = x.get_shape()[axis].value

    with tf.variable_scope(name+'/qpi_vars', reuse=reuse):
        with tf.device('/cpu:0'):
            mu = tf.get_variable('mu', shape=params_shape.tolist(),
                                 initializer=tf.zeros_initializer())
            log_sigma = tf.get_variable('log_sigma', shape=params_shape.tolist(),
                    initializer=tf.constant_initializer(-5.0))

    mu = tf.clip_by_value(mu, -20.0, 5.0)
    log_sigma = tf.clip_by_value(log_sigma, -20.0, 5.0)
    sigma = tf.exp(log_sigma)

    # adding loss
    alpha = (min_log-mu)/sigma
    beta = (max_log-mu)/sigma
    z = phi(beta) - phi(alpha)

    def pdf(x):
        return tf.exp(-x*x/2.0)/tf.sqrt(2.0*np.pi)
    kl = -log_sigma-tf.log(z)-(alpha*pdf(alpha)-beta*pdf(beta))/(2.0*z)
    kl = kl+tf.log(max_log-min_log)-tf.log(2.0*np.pi*np.e)/2.0
    if scale_kl is None:
        kl = tf.reduce_sum(kl)
    else:
        kl = scale_kl*tf.reduce_mean(kl)

    if training:
        z = tf.exp(sample_truncated_normal(mu, sigma, min_log, max_log))
    else:
        z = mean_truncated_log_normal(mu, sigma, min_log, max_log)
    snr = snr_truncated_log_normal(mu, sigma, min_log, max_log)
    mask = tf.cast(tf.greater(snr, thres*tf.ones_like(snr)), tf.float32)

    n_active = tf.reduce_sum(tf.cast(mask, tf.int32))

    if collect:
        if reuse is not True:
            tf.add_to_collection('kl', kl)
        prefix = 'train_' if training else 'test_'
        tf.add_to_collection(prefix+'pi', snr)
        tf.add_to_collection(prefix+'n_active', n_active)

    if not training:
        z = mask*z

    return x*z


## Let's run the code!

In [0]:
tf.reset_default_graph() # 기존의 그려진 텐서플로우 그래프를 제거합니다.

pretraindir = './results/pretrained' 
savedir = './results/bbdropout/sample_run' 
if not os.path.isdir(savedir):
    os.makedirs(savedir)

batch_size = 100
n_epochs = 60
save_freq = 20
mnist, n_train_batches, n_test_batches = mnist_input(batch_size)
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
N = mnist.train.num_examples
if dropout_type == None:
  dropout = None
  print('dropout None')
elif dropout_type == 'sbpdropout':
  dropout = sbpdropout
  print('sbpdropout')
elif dropout_type == 'bbdropout':
  dropout = bbdropout
  print('bbdropout')

  
net = lenet_dense(x, y, True, dropout=dropout)
tnet = lenet_dense(x, y, False, reuse=True, dropout=dropout)

def train():
    if dropout_type == None:
      loss = net['cent'] + net['wd'] 
    else:
      loss = net['cent'] + tf.add_n(net['kl'])/float(N) + net['wd'] 
    global_step = tf.train.get_or_create_global_step()
    bdr = [int(n_train_batches*(n_epochs-1)*r) for r in [0.5, 0.75]]
    vals = [1e-2, 1e-3, 1e-4]
    lr = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), bdr, vals)

    if dropout_type == None:
      train_op = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
              var_list=net['weights'])
    else:
      train_op1 = tf.train.AdamOptimizer(lr).minimize(loss,
              var_list=net['qpi_vars'], global_step=global_step)
      train_op2 = tf.train.AdamOptimizer(0.1*lr).minimize(loss,
              var_list=net['weights'])
      train_op = tf.group(train_op1, train_op2)

    pretrain_saver = tf.train.Saver(net['weights'])
    saver = tf.train.Saver(net['weights']+net['qpi_vars'])
    logfile = open(os.path.join(savedir, 'train.log'), 'w', 0)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    pretrain_saver.restore(sess, os.path.join(pretraindir, 'model'))

    train_logger = Accumulator('cent', 'acc')
    train_to_run = [train_op, net['cent'], net['acc']]
    test_logger = Accumulator('cent', 'acc')
    test_to_run = [tnet['cent'], tnet['acc']]
    for i in range(n_epochs):
        line = 'Epoch %d start, learning rate %f' % (i+1, sess.run(lr))
        #print(line)
        logfile.write(line + '\n')
        train_logger.clear()
        start = time.time()
        for j in range(n_train_batches):
            bx, by = mnist.train.next_batch(batch_size)
            train_logger.accum(sess.run(train_to_run, {x:bx, y:by}))
        train_logger.print_(header='train', epoch=i+1,
                time=time.time()-start, logfile=logfile)

        test_logger.clear()
        for j in range(n_test_batches):
            bx, by = mnist.test.next_batch(batch_size)
            test_logger.accum(sess.run(test_to_run, {x:bx, y:by}))
        test_logger.print_(header='test', epoch=i+1,
                time=time.time()-start, logfile=logfile)
        #line = 'kl: ' + str(sess.run(tnet['kl'])) + '\n'
        if dropout_type == None:
          pass
        else:
          line += '\nn_active: ' + str(sess.run(tnet['n_active'])) + '\n'
          print(line)
          logfile.write(line+'\n')

        if (i+1)% save_freq == 0:
            saver.save(sess, os.path.join(savedir, 'model'))

    logfile.close()
    saver.save(sess, os.path.join(savedir, 'model'))

In [0]:
train()

## Test

In [0]:
def test():
    sess = tf.Session()
    saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
    saver.restore(sess, os.path.join(savedir, 'model'))
    logger = Accumulator('cent', 'acc')
    to_run = [tnet['cent'], tnet['acc']]
    for j in range(n_test_batches):
        bx, by = mnist.test.next_batch(batch_size)
        logger.accum(sess.run(to_run, {x:bx, y:by}))
    logger.print_(header='test')
    
    if dropout_type == None:
      pass
    else:
      n_active = sess.run(tnet['n_active'])
      print("The percentage of activated neurons per layer:")
      for na, nl in zip(n_active, [784, 500, 300]):
        print('{}/{} = {:.2f}%'.format(na, nl, float(na)/nl * 100))
    
test()

## Visualization

In [0]:
def visualize():
    sess = tf.Session()
    saver = tf.train.Saver(tnet['weights']+tnet['qpi_vars'])
    saver.restore(sess, os.path.join(savedir, 'model'))

    n_drop = len(tnet['n_active'])
    fig = figure('pi', figsize=(8,6))
    axarr = fig.subplots(n_drop)
    for i in range(n_drop):
        np_pi = sess.run(tnet['pi'][i]).reshape((1,-1))
        im = axarr[i].imshow(np_pi, cmap='Blues', aspect='auto')
        axarr[i].yaxis.set_visible(False)
        axarr[i].xaxis.set_major_locator(MaxNLocator(integer=True))
        if i == n_drop-1:
            axarr[i].set_xlabel('The Number of Neurons\nLeNet [784, 500, 300]')
        fig.colorbar(im, ax=axarr[i])
    show()
visualize()