In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import scipy
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import time, os, pickle, pathlib

import model
import trainutils

report_every = 10  # how often to print stats during training
n_runs       = 1   # how many times to repeat the whole scan across beta's
savedirbase  = str(pathlib.Path().absolute()) + '/saveddata3/'

optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999)

base_cfg = {
    'n_batch'         : 128 ,         # SGD batch size
    'train_noisevar'  : 'gradient',   # train noise variance with gradient descent ('gradient'), 
                                      #  scipy optimizer loop ('scipy'), or leave fixed ('none')
    'n_noisevar_batch': 1000,         # batch size for training noise variance when train_noisevar='scipy'
    'initial_fitvar'  : False,        # whether to set noisevar to optimal value before training
    'squaredIB'       : False,        # optimize I(Y;T)-beta*I(X;T) or I(Y;T)-beta*I(X;T)^2 
    'err_func'        : 'softmax_ce', # 'softmax_ce' for classification, 'mse' for regression  
}


runtype = 'MNIST'
#runtype = 'NoisyClassifier'
#runtype = 'Regression'
runtype = 'NoisyClassifierWine'

if runtype == 'MNIST':
    data = trainutils.load_mnist()
    savedir = runtype + '/v1'
    cfg = {
        'n_epochs'    : 150,
        'squaredIB'   : True,
        'encoder_arch': [(512,tf.nn.relu),(512,tf.nn.relu),(2,None)], 
        'decoder_arch': [(512,tf.nn.relu),(10,None)],
    }

elif runtype == 'NoisyClassifierWine':
    data = trainutils.load_wine()
    savedir = runtype + '/v3d'
    cfg = {
        'n_epochs'    : 300,
        'encoder_arch': [(10,tf.nn.relu),(10,tf.nn.relu),(3,tf.nn.relu),], 
        'decoder_arch': [(10,tf.nn.relu),(data['trn_Y'].shape[1],None)],
    }
    
elif runtype == 'NoisyClassifier':
    savedir = runtype + '/v1'
    data = trainutils.load_szt()
    cfg = {
        'n_epochs'      : 300,
        'encoder_arch'  : [(20,tf.nn.relu),(20,tf.nn.relu),(2,None)], 
        'decoder_arch'  : [(20,tf.nn.relu),(2,None)],
    }
    
elif runtype == 'Regression':
    savedir = runtype + '/v2' # v5sq-eta'
    # data generated by makeregressiondata.py
    with open('data/regression-100-10.pkl', 'rb') as f:
        data = pickle.load(f)
    labelcov = np.cov(data['trn_Y'].T)
    data['entropyY'] = 0.5 * np.log(np.linalg.det(2*np.pi*np.exp(1)*labelcov))
    
    cfg = {
        'n_epochs'         : 300,
        'encoder_arch'     : [(100,tf.nn.relu),(100,tf.nn.relu),(2,None)], 
        'decoder_arch'     : [(100,tf.nn.relu),(10,None)],
        'err_func'         : 'mse',
    }
    
else:
    raise Exception('unknown runtype')
    
savedir = savedirbase + savedir
for k, v in base_cfg.items():
    if k not in cfg: 
        cfg[k] = v
cfg['optimizer'] = repr(optimizer)

betavals = 10**np.linspace(-3, 1, 15, endpoint=True)


  from ._conv import register_converters as _register_converters


In [2]:
tf.reset_default_graph()
sess=tf.Session()


n = model.Net(input_dims   = data['trn_X'].shape[1],
              encoder_arch = cfg['encoder_arch'], 
              decoder_arch = cfg['decoder_arch'],
              err_func     = cfg['err_func'],
              entropyY     = data['entropyY'],
              trainable_noisevar = cfg['train_noisevar']=='gradient', 
              noisevar     = 0.01)
saver = tf.train.Saver()


In [3]:
if True:
    print("Making base model")
    #cfg2['n_epochs'] = 10
    sess.run(tf.global_variables_initializer())

    trainutils.train(sess, 'ce', 0.0, cfg, data, n, optimizer, report_every, fname=savedir+'/results-base')

    save_path = saver.save(sess, savedir+'/_tf_basemodel')
    print("Model saved in path: %s" % save_path)

Making base model

mode: ce epoch: 1 | beta: 0.0000 | noisevar: 0.01 | kw: 0.00671535
ce:  1.062/ 1.042 | acc:  0.612/ 0.624 | loss:  1.062/ 1.042 | 
Ixt:  6.672/ 6.678 | Ixt_lb:  5.579/ 5.585 | vIxt:  120.289/ 118.235 | Iyt: -0.404/-0.385 | 

mode: ce epoch: 11 | beta: 0.0000 | noisevar: 0.01 | kw: 0.00671535
ce:  0.640/ 0.637 | acc:  0.635/ 0.638 | loss:  0.640/ 0.637 | 
Ixt:  6.384/ 6.399 | Ixt_lb:  5.298/ 5.304 | vIxt:  82.212/ 83.189 | Iyt:  0.017/ 0.021 | 

mode: ce epoch: 21 | beta: 0.0000 | noisevar: 0.01 | kw: 0.00671535
ce:  0.624/ 0.614 | acc:  0.631/ 0.643 | loss:  0.624/ 0.614 | 
Ixt:  6.556/ 6.544 | Ixt_lb:  5.483/ 5.459 | vIxt:  99.263/ 98.757 | Iyt:  0.033/ 0.043 | 

mode: ce epoch: 31 | beta: 0.0000 | noisevar: 0.01 | kw: 0.00671535
ce:  0.586/ 0.588 | acc:  0.665/ 0.670 | loss:  0.586/ 0.588 | 
Ixt:  7.004/ 6.984 | Ixt_lb:  5.928/ 5.919 | vIxt:  109.430/ 112.946 | Iyt:  0.071/ 0.070 | 

mode: ce epoch: 41 | beta: 0.0000 | noisevar: 0.01 | kw: 0.00671535
ce:  0.568/ 0.

In [4]:
#sess  = tf.Session()
#n = model.Net(encoder_arch=[(512,'relu'),(512,'relu'),(2,'relu')], decoder_arch=[(512,'relu'),],
#              trainable_sigma=cfg['train_sigma'], log_sigma2=-2, log_eta2=-20, init_beta=0.0)
#loader = tf.train.import_meta_graph(basemodelpath+'.meta')
#saver.restore(sess, basemodelpath)


In [None]:
for runndx in range(n_runs):
    for beta in betavals:
        if np.isclose(beta,0): 
            continue
        for mode in ['VIB','nlIB',]:
            saver.restore(sess, savedir+'/_tf_basemodel')
            sqmode = 'sq' if cfg['squaredIB'] else 'reg'
            fname = savedir + '/results-%s-%0.5f-%s-run%d' % (mode, beta, sqmode, runndx)
            print("Doing %s, beta=%0.4f, %s %s" % (mode, beta, sqmode, fname))
            trainutils.train(sess, mode, beta, cfg, data, n, optimizer, report_every=report_every, fname=fname, fit_var=cfg['initial_fitvar'])

            print()
            print()

Doing VIB, beta=0.0010, reg /home/artemy/nonlinearIB/code/saveddata3/NoisyClassifierWine/v3d/results-VIB-0.00100-reg-run0

mode: VIB epoch: 1 | beta: 0.0010 | noisevar: 0.01 | kw: 0.0334898
ce:  0.498/ 0.501 | acc:  0.752/ 0.751 | loss: -0.096/-0.092 | 
Ixt:  7.402/ 7.412 | Ixt_lb:  6.169/ 6.174 | vIxt:  63.251/ 64.285 | Iyt:  0.159/ 0.156 | 

mode: VIB epoch: 11 | beta: 0.0010 | noisevar: 0.0116545 | kw: 0.0383542
ce:  0.495/ 0.503 | acc:  0.752/ 0.751 | loss: -0.136/-0.128 | 
Ixt:  6.118/ 6.084 | Ixt_lb:  4.895/ 4.857 | vIxt:  26.616/ 26.425 | Iyt:  0.163/ 0.154 | 

mode: VIB epoch: 21 | beta: 0.0010 | noisevar: 0.0132769 | kw: 0.0128382
ce:  0.502/ 0.498 | acc:  0.757/ 0.750 | loss: -0.133/-0.137 | 
Ixt:  5.251/ 5.278 | Ixt_lb:  4.066/ 4.083 | vIxt:  22.285/ 22.347 | Iyt:  0.155/ 0.160 | 

mode: VIB epoch: 31 | beta: 0.0010 | noisevar: 0.0149723 | kw: 0.0128281
ce:  0.507/ 0.501 | acc:  0.742/ 0.750 | loss: -0.131/-0.137 | 
Ixt:  5.223/ 5.212 | Ixt_lb:  3.991/ 3.975 | vIxt:  18.993/


mode: nlIB epoch: 41 | beta: 0.0010 | noisevar: 0.0109969 | kw: 0.0249321
ce:  0.504/ 0.500 | acc:  0.742/ 0.749 | loss: -0.146/-0.150 | 
Ixt:  7.007/ 7.003 | Ixt_lb:  5.777/ 5.778 | vIxt:  60.082/ 60.394 | Iyt:  0.153/ 0.157 | 

mode: nlIB epoch: 51 | beta: 0.0010 | noisevar: 0.0113591 | kw: 0.0593621
ce:  0.497/ 0.495 | acc:  0.743/ 0.748 | loss: -0.153/-0.155 | 
Ixt:  7.428/ 7.421 | Ixt_lb:  6.165/ 6.160 | vIxt:  61.107/ 60.649 | Iyt:  0.160/ 0.163 | 

mode: nlIB epoch: 61 | beta: 0.0010 | noisevar: 0.0112609 | kw: 0.0463862
ce:  0.499/ 0.499 | acc:  0.754/ 0.752 | loss: -0.151/-0.151 | 
Ixt:  7.292/ 7.279 | Ixt_lb:  6.035/ 6.026 | vIxt:  59.276/ 59.255 | Iyt:  0.159/ 0.158 | 

mode: nlIB epoch: 71 | beta: 0.0010 | noisevar: 0.0111939 | kw: 0.0361239
ce:  0.498/ 0.503 | acc:  0.751/ 0.747 | loss: -0.152/-0.147 | 
Ixt:  7.217/ 7.232 | Ixt_lb:  5.967/ 5.974 | vIxt:  59.174/ 58.979 | Iyt:  0.160/ 0.155 | 

mode: nlIB epoch: 81 | beta: 0.0010 | noisevar: 0.0111239 | kw: 0.0254597
ce:  


mode: VIB epoch: 91 | beta: 0.0019 | noisevar: 0.0385861 | kw: 0.0114595
ce:  0.496/ 0.499 | acc:  0.755/ 0.751 | loss: -0.139/-0.137 | 
Ixt:  3.692/ 3.709 | Ixt_lb:  2.541/ 2.553 | vIxt:  11.245/ 11.246 | Iyt:  0.161/ 0.159 | 

mode: VIB epoch: 101 | beta: 0.0019 | noisevar: 0.0404142 | kw: 0.00800059
ce:  0.493/ 0.500 | acc:  0.752/ 0.747 | loss: -0.144/-0.136 | 
Ixt:  3.610/ 3.589 | Ixt_lb:  2.461/ 2.441 | vIxt:  10.753/ 10.647 | Iyt:  0.165/ 0.157 | 

mode: VIB epoch: 111 | beta: 0.0019 | noisevar: 0.042691 | kw: 0.0115074
ce:  0.493/ 0.508 | acc:  0.747/ 0.740 | loss: -0.144/-0.129 | 
Ixt:  3.556/ 3.542 | Ixt_lb:  2.415/ 2.405 | vIxt:  10.613/ 10.577 | Iyt:  0.165/ 0.149 | 

mode: VIB epoch: 121 | beta: 0.0019 | noisevar: 0.0448959 | kw: 0.0103443
ce:  0.498/ 0.511 | acc:  0.752/ 0.748 | loss: -0.139/-0.126 | 
Ixt:  3.431/ 3.430 | Ixt_lb:  2.311/ 2.309 | vIxt:  10.493/ 10.491 | Iyt:  0.160/ 0.146 | 

mode: VIB epoch: 131 | beta: 0.0019 | noisevar: 0.0451285 | kw: 0.00638339
ce:  

In [None]:
# Code to plot activations
%matplotlib inline 
import matplotlib.pyplot as plt
import seaborn as sns
if False:
    plt.figure(figsize=(10,10))
    x, y = data['tst_X'], data['tst_Y']
    mx = sess.run(n.encoder[-1], feed_dict={n.x: x})
    var = sess.run(n.noisevar)
    ax = plt.axes()
    for r in mx:
        c = plt.Circle((r[0], r[1]), radius=np.sqrt(var), fc='none', alpha=0.05, ec='k')
        ax.add_patch(c)
    plt.axis('scaled');