In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import scipy
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import time, os, pickle, pathlib

import model
import trainutils
import loaddata

report_every = 10  # how often to print stats during training
n_runs       = 1   # how many times to repeat the whole scan across beta's
savedirbase  = str(pathlib.Path().absolute()) + '/saveddata4/'


# global_step = tf.Variable(0, trainable=False)
# starter_learning_rate = 0.01
# learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
#                                           100000, 0.96, staircase=True)

optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999) 

runtype = 'MNIST'
runtype = 'NoisyClassifier'
# runtype = 'Regression'
# runtype = 'NoisyClassifierWine'


cfg = {
    'runtype'         : runtype,      # which dataset we're running
    'n_batch'         : 128 ,         # SGD batch size
    'train_noisevar'  : 'gradient',   # train noise variance with gradient descent ('gradient'), 
                                      #  scipy optimizer loop ('scipy'), or leave fixed ('none')
    'n_noisevar_batch': 1000,         # batch size for training noise variance when train_noisevar='scipy'
    'initial_fitvar'  : False,        # whether to set noisevar to optimal value before training
    'squaredIB'       : False,        # optimize I(Y;T)-beta*I(X;T) or I(Y;T)-beta*I(X;T)^2 
    'err_func'        : 'softmax_ce', # 'softmax_ce' for classification, 'mse' for regression  
    'train_kdewidth'  : True,         # whether to adapt the kernel width 
}
#cfg['train_noisevar'] = 'scipy'
#cfg['train_kdewidth'] = False


betavals = None
data     = loaddata.load_data(runtype)

if runtype == 'MNIST':
    savedir = runtype + '/v1'
    cfg.update({
        'n_epochs'    : 150,
        'squaredIB'   : True,
        'encoder_arch': [(512,tf.nn.relu),(512,tf.nn.relu),(2,None)], 
        'decoder_arch': [(512,tf.nn.relu),(10,None)],
    })

elif runtype == 'NoisyClassifierWine':
    savedir = runtype + '/v1'
    cfg.update({
        'n_epochs'    : 500,
        'encoder_arch': [(10,tf.nn.relu),(10,tf.nn.relu),(10,tf.nn.relu),], 
        #'encoder_arch': [(10,tf.nn.relu),(10,tf.nn.relu),(3,tf.nn.relu),], 
        'decoder_arch': [(10,tf.nn.relu),(data['trn_Y'].shape[1],None)],
        #'squaredIB'   : True,
    })
    betavals = 10**np.linspace(-3, 0, 30, endpoint=True)
    
elif runtype == 'NoisyClassifier':
    savedir = runtype + '/v1'
    cfg.update({
        'n_epochs'      : 300,
        'encoder_arch'  : [(20,tf.nn.relu),(20,tf.nn.relu),(2,None)], 
        'decoder_arch'  : [(20,tf.nn.relu),(2,None)],
    })
    
elif runtype == 'Regression':
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0003, beta1=0.9, beta2=0.999) 
    #savedir = runtype + '/v2' # v5sq-eta'
    savedir = runtype + '/v2' # v5sq-eta'
    cfg.update({
        'n_epochs'         : 1500,
        #'encoder_arch'     : [(100,tf.nn.relu),(100,tf.nn.relu),(2,None)], 
        'encoder_arch'     : [(100,tf.nn.relu),(100,tf.nn.relu),(10,None)], 
        'decoder_arch'     : [(100,tf.nn.relu),(10,None)],
        'err_func'         : 'mse',
    })
    betavals = 10**np.linspace(-4, 1, 30, endpoint=True)
    #betavals = 10**np.linspace(0, 1, 10, endpoint=True)
        
else:
    raise Exception('unknown runtype')
    
savedir = savedirbase + savedir
cfg['optimizer'] = repr(optimizer)

if betavals is None:
    betavals = 10**np.linspace(-5, 0.1, 30, endpoint=True)
#betavals = 10**np.linspace(0.1, 1, 10, endpoint=True)
#betavals = np.linspace(0.1, 0.4, 10, endpoint=True)

In [None]:

def get_net():
    return model.Net(input_dims   = data['trn_X'].shape[1],
                  encoder_arch = cfg['encoder_arch'], 
                  decoder_arch = cfg['decoder_arch'],
                  err_func     = cfg['err_func'],
                  entropyY     = data['entropyY'],
                  trainable_noisevar = cfg['train_noisevar']=='gradient', 
                  noisevar     = 0.01,
                  kdewidth    = -20.)


In [None]:
tf.reset_default_graph()
with tf.Session() as sess:
    print("Making base model")
    n = get_net()
    saver = tf.train.Saver(max_to_keep=30) # save last 30 epochs
    
    #cfg2['n_epochs'] = 10
    sess.run(tf.global_variables_initializer())

    trainutils.train(sess, saver, 'ce', 0.0, cfg, data, n, optimizer, report_every, savedir=savedir+'/basemodel')
    print("Model saved in path: %s" % savedir)
    del n, saver

In [None]:
for runndx in range(n_runs):
    for beta in betavals:
        if np.isclose(beta,0): 
            continue
        for mode in ['VIB','nlIB',]:
            tf.reset_default_graph()
            with tf.Session() as sess:
                n = get_net()
                saver = tf.train.Saver(max_to_keep=30) # save last 30 epochs
                saver.restore(sess, tf.train.latest_checkpoint(savedir+'/basemodel'))
                sqmode  = 'sq'  if cfg['squaredIB'] else 'reg'
                print("Doing %s, beta=%0.4f, %s" % (mode, beta, sqmode))
                trainutils.train(sess, saver, mode, beta, cfg, data, n, optimizer, report_every=report_every, 
                                 savedir=savedir + '/results-%s-%0.5f-%s-run%d' % (mode, beta, sqmode, runndx), 
                                 fit_var=cfg['initial_fitvar'])
                del saver

                print()
                print()

In [None]:
# Code to plot activations
%matplotlib inline 
import matplotlib.pyplot as plt
import seaborn as sns
if False:
    plt.figure(figsize=(10,10))
    x, y = data['tst_X'], data['tst_Y']
    mx = sess.run(n.encoder[-1], feed_dict={n.x: x})
    var = sess.run(n.noisevar)
    ax = plt.axes()
    for r in mx:
        c = plt.Circle((r[0], r[1]), radius=np.sqrt(var), fc='none', alpha=0.05, ec='k')
        ax.add_patch(c)
    plt.axis('scaled');