In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import scipy
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import time, os, pickle, pathlib

import model
import trainutils

report_every = 10  # how often to print stats during training
n_runs       = 1   # how many times to repeat the whole scan across beta's
savedirbase  = str(pathlib.Path().absolute()) + '/saveddata3/'

optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999)

base_cfg = {
    'n_batch'         : 128 ,         # SGD batch size
    'train_noisevar'  : 'gradient',   # train noise variance with gradient descent ('gradient'), 
                                      #  scipy optimizer loop ('scipy'), or leave fixed ('none')
    'n_noisevar_batch': 1000,         # batch size for training noise variance when train_noisevar='scipy'
    'initial_fitvar'  : False,        # whether to set noisevar to optimal value before training
    'squaredIB'       : False,        # optimize I(Y;T)-beta*I(X;T) or I(Y;T)-beta*I(X;T)^2 
    'err_func'        : 'softmax_ce', # 'softmax_ce' for classification, 'mse' for regression  
}


runtype = 'MNIST'
#runtype = 'NoisyClassifier'
# runtype = 'Regression'
runtype = 'NoisyClassifierWine'

if runtype == 'MNIST':
    data = trainutils.load_mnist()
    savedir = runtype + '/v1'
    cfg = {
        'n_epochs'    : 150,
        'squaredIB'   : True,
        'encoder_arch': [(512,tf.nn.relu),(512,tf.nn.relu),(2,None)], 
        'decoder_arch': [(512,tf.nn.relu),(10,None)],
    }

elif runtype == 'NoisyClassifierWine':
    data = trainutils.load_wine()
    savedir = runtype + '/v10dv5'
    cfg = {
        'n_epochs'    : 500,
        'encoder_arch': [(10,tf.nn.relu),(10,tf.nn.relu),(10,tf.nn.relu),], 
        #'encoder_arch': [(10,tf.nn.relu),(10,tf.nn.relu),(3,tf.nn.relu),], 
        'decoder_arch': [(10,tf.nn.relu),(data['trn_Y'].shape[1],None)],
    }
    
elif runtype == 'NoisyClassifier':
    savedir = runtype + '/v1'
    data = trainutils.load_szt()
    cfg = {
        'n_epochs'      : 300,
        'encoder_arch'  : [(20,tf.nn.relu),(20,tf.nn.relu),(2,None)], 
        'decoder_arch'  : [(20,tf.nn.relu),(2,None)],
    }
    
elif runtype == 'Regression':
    #savedir = runtype + '/v2' # v5sq-eta'
    savedir = runtype + '/v10d' # v5sq-eta'
    # data generated by makeregressiondata.py
    with open('data/regression-100-10.pkl', 'rb') as f:
        data = pickle.load(f)
    labelcov = np.cov(data['trn_Y'].T)
    data['entropyY'] = 0.5 * np.log(np.linalg.det(2*np.pi*np.exp(1)*labelcov))
    
    cfg = {
        'n_epochs'         : 500,
        #'encoder_arch'     : [(100,tf.nn.relu),(100,tf.nn.relu),(2,None)], 
        'encoder_arch'     : [(100,tf.nn.relu),(100,tf.nn.relu),(10,None)], 
        'decoder_arch'     : [(100,tf.nn.relu),(10,None)],
        'err_func'         : 'mse',
    }
    
else:
    raise Exception('unknown runtype')
    
savedir = savedirbase + savedir
for k, v in base_cfg.items():
    if k not in cfg: 
        cfg[k] = v
cfg['optimizer'] = repr(optimizer)

#betavals = 10**np.linspace(-5, 0.1, 30, endpoint=True)
betavals = 10**np.linspace(0.1, 1, 10, endpoint=True)
betavals = np.linspace(0.1, 0.4, 10, endpoint=True)

In [None]:
tf.reset_default_graph()
sess=tf.Session()


n = model.Net(input_dims   = data['trn_X'].shape[1],
              encoder_arch = cfg['encoder_arch'], 
              decoder_arch = cfg['decoder_arch'],
              err_func     = cfg['err_func'],
              entropyY     = data['entropyY'],
              trainable_noisevar = cfg['train_noisevar']=='gradient', 
              noisevar     = 0.01)
saver = tf.train.Saver()


In [None]:
if True:
    print("Making base model")
    #cfg2['n_epochs'] = 10
    sess.run(tf.global_variables_initializer())

    trainutils.train(sess, 'ce', 0.0, cfg, data, n, optimizer, report_every, fname=savedir+'/results-base')

    save_path = saver.save(sess, savedir+'/_tf_basemodel')
    print("Model saved in path: %s" % save_path)

In [None]:
#sess  = tf.Session()
#n = model.Net(encoder_arch=[(512,'relu'),(512,'relu'),(2,'relu')], decoder_arch=[(512,'relu'),],
#              trainable_sigma=cfg['train_sigma'], log_sigma2=-2, log_eta2=-20, init_beta=0.0)
#loader = tf.train.import_meta_graph(basemodelpath+'.meta')
#saver.restore(sess, basemodelpath)


In [None]:
for runndx in range(n_runs):
    for beta in betavals:
        if np.isclose(beta,0): 
            continue
        for mode in ['VIB','nlIB',]:
            saver.restore(sess, savedir+'/_tf_basemodel')
            sqmode  = 'sq'  if cfg['squaredIB'] else 'reg'
            fname = savedir + '/results-%s-%0.5f-%s-run%d' % (mode, beta, sqmode, runndx)
            print("Doing %s, beta=%0.4f, %s %s" % (mode, beta, sqmode, fname))
            trainutils.train(sess, mode, beta, cfg, data, n, optimizer, report_every=report_every, fname=fname, fit_var=cfg['initial_fitvar'])

            print()
            print()

In [None]:
# Code to plot activations
%matplotlib inline 
import matplotlib.pyplot as plt
import seaborn as sns
if False:
    plt.figure(figsize=(10,10))
    x, y = data['tst_X'], data['tst_Y']
    mx = sess.run(n.encoder[-1], feed_dict={n.x: x})
    var = sess.run(n.noisevar)
    ax = plt.axes()
    for r in mx:
        c = plt.Circle((r[0], r[1]), radius=np.sqrt(var), fc='none', alpha=0.05, ec='k')
        ax.add_patch(c)
    plt.axis('scaled');