In [None]:
from __future__ import print_function
import cPickle

import keras
import keras.backend as K

import numpy as np
import scipy.optimize
import scipy.misc
from collections import OrderedDict

import utils
#SGD_BATCHSIZE = 128
SGD_BATCHSIZE = 128
SGD_LEARNINGRATE = 0.001

if True:
    #LAYER_DIMS = [1024, 20, 20, 20]
    LAYER_DIMS = [32, 28, 24, 20, 16, 12, 8, 8]
    #LAYER_DIMS = [100, 50, 20]
    #LAYER_DIMS = [50, 20, 20, 20] # 0.967 w. 128
    #LAYER_DIMS = [20, 20, 20, 20] # 0.967 w. 128
    #LAYER_DIMS = [128, 64, 32, 16, 16] # 0.967 w. 128
    #LAYER_DIMS = [20, 20, 20, 20, 20, 20] # 0.967 w. 128
    ARCH_NAME =  '-'.join(map(str,LAYER_DIMS))
    trn, tst = utils.get_mnist()
else:
    ARCH_NAME = 'conv1'
    trn, tst = utils.get_mnist2d()

#ACTIVATION = 'relu'
ACTIVATION = 'tanh'

#SGD_BATCHSIZE = 512
NUM_EPOCHS     = 10000
NUMBER_OF_BINS = 5



BASE_DIR = 'rawdata/' + ACTIVATION + '_' + ARCH_NAME


import os
if not os.path.exists(BASE_DIR):
    os.makedirs(BASE_DIR)

In [None]:
class Reporter(keras.callbacks.Callback):
    def __init__(self, whentodo=None, *kargs, **kwargs):
        super(Reporter, self).__init__(*kargs, **kwargs)
        if whentodo is None:
            whentodo = lambda epoch: True
        self.whentodo = whentodo
        
    def on_train_begin(self, logs={}):
        self.layerfuncs = []
        self.layerweights = []
        self.layerixs = []
        for lndx, l in enumerate(self.model.layers):
            if hasattr(l, 'kernel'):
                self.layerixs.append(lndx)
                self.layerfuncs.append(K.function(self.model.inputs, [l.output,]))
                self.layerweights.append(l.kernel)
            
        input_tensors = [model.inputs[0],
                         model.sample_weights[0],
                         model.targets[0],
                         K.learning_phase(),
        ]
        self.get_gradients = K.function(inputs=input_tensors, outputs=model.optimizer.get_gradients(model.total_loss, self.layerweights))
        self.get_loss = K.function(inputs=input_tensors, outputs=model.total_loss)
            
    def on_epoch_begin(self, epoch, logs={}):
        if not self.whentodo(epoch):
            self._log_gradients = False
        else:
            self._log_gradients = True
            self._batch_weightnorm = []
                
            self._batch_gradients = [ [] for _ in self.model.layers[1:] ]
            ixs = list(range(len(trn.X)))
            np.random.shuffle(ixs)
            self._batch_todo_ixs = ixs

            
    def on_batch_begin(self, batch, logs={}):
        if not self._log_gradients:
            return
        
        cur_ixs = self._batch_todo_ixs[:SGD_BATCHSIZE]
        inputs = [trn.X[cur_ixs,:], [1,]*len(cur_ixs), trn.Y[cur_ixs,:], 1]
        for lndx, g in enumerate(self.get_gradients(inputs)):
            # g is gradients for weights of lndx's layer
            oneDgrad = np.reshape(g, -1, 1)
            self._batch_gradients[lndx].append(oneDgrad)

        # Advance the indexing
        self._batch_todo_ixs = self._batch_todo_ixs[SGD_BATCHSIZE:]

    def on_epoch_end(self, epoch, logs={}):
        if not self.whentodo(epoch):
            return
        
        self._log_gradients = True
        
        # Get overall performance
        loss = {}
        for cdata, cdataname, istrain in ((trn,'trn',1), (tst, 'tst',0)):
            loss[cdataname] = self.get_loss([cdata.X, [1,]*len(cdata.X), cdata.Y, istrain]).flat[0]
            
        # Based on https://github.com/ravidziv/IDNNs/blob/1c4926f641d4306af7ae37325358be19e8f4d276/idnns/plots/plot_gradients.py
        
        data = {
            'weights_norm':[],
            'gradmean': [],
            'gradstd': [],
            'activity_tst': []
        }
        
        for lndx, layerix in enumerate(self.layerixs):
            clayer = self.model.layers[layerix]
            weights_norm = np.linalg.norm(K.get_value(clayer.kernel))
            stackedgrads = np.stack(self._batch_gradients[lndx], axis=1)
            gradmean = np.linalg.norm(stackedgrads.mean(axis=1))
            gradstd  = np.linalg.norm(stackedgrads.std(axis=1))
            
            data['weights_norm'].append(weights_norm)
            data['gradmean'].append(gradmean)
            data['gradstd'].append(gradstd)
            
            #trndata = trn.X[::20]
            #tstdata = tst.X[::10]
            #data['activity_trn'].append(self.layerfuncs[lndx]([trn.X,])[0])
            data['activity_tst'].append(self.layerfuncs[lndx]([tst.X,])[0])
            
        fname = BASE_DIR + "/epoch%08d"% epoch
        print("Saving", fname)
        with open(fname, 'wb') as f:
             cPickle.dump({'ACTIVATION':ACTIVATION, 'epoch':epoch, 'data':data, 'loss':loss}, f, cPickle.HIGHEST_PROTOCOL)        
        
            
if ARCH_NAME == 'conv1':
    input_layer  = keras.layers.Input((trn.X.shape[1],trn.X.shape[2],1))
    clayer = keras.layers.Conv2D(32, kernel_size=(3, 3), activation=ACTIVATION)(input_layer)
    clayer = keras.layers.Conv2D(32, kernel_size=(3, 3), activation=ACTIVATION)(clayer)
    clayer = keras.layers.MaxPooling2D(pool_size=(2,2))(clayer)
    clayer = keras.layers.Flatten()(clayer)
    clayer = keras.layers.Dense(20, activation=ACTIVATION)(clayer)
    clayer = keras.layers.Dense(20, activation=ACTIVATION)(clayer)
else:
    input_layer  = keras.layers.Input((trn.X.shape[1],))
    clayer = input_layer
    for n in LAYER_DIMS:
        clayer = keras.layers.Dense(n, activation=ACTIVATION)(clayer)

outputs  = keras.layers.Dense(trn.nb_classes, activation='softmax')(clayer)
model = keras.models.Model(inputs=input_layer, outputs=outputs)
optimizer = keras.optimizers.SGD(lr=SGD_LEARNINGRATE)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

def do_report(epoch):
    if epoch < 20:
        return True
    elif epoch < 100:
        return (epoch % 5 == 0)
    elif epoch < 200:
        return (epoch % 10 == 0)
    else:
        return (epoch % 100 == 0)
    
reporter   = Reporter(whentodo=do_report)

In [None]:
#r = model.fit(x=trn.X, y=trn.Y, verbose=2, batch_size=SGD_BATCHSIZE, epochs=NUM_EPOCHS, 
#              validation_data=(tst.X, tst.Y), callbacks=[reporter,])
r = model.fit(x=trn.X, y=trn.Y, verbose=2, batch_size=SGD_BATCHSIZE, epochs=NUM_EPOCHS, callbacks=[reporter,])


In [None]:

asdadsf
epochs = sorted(reporter.saved_logs.keys())
cm = plt.cm.get_cmap('inferno')

for colndx, t in enumerate(['trn','tst']):
    loss = [reporter.saved_logs[epoch][t+'_loss'] for epoch in epochs]
    plt.figure()
    for lndx, layerid in enumerate(PLOT_LAYERS):
        upperh = [reporter.saved_logs[epoch][t+'_layer_%d_h_upper' % lndx] for epoch in epochs]
        lowerh = [reporter.saved_logs[epoch][t+'_layer_%d_h_lower' % lndx] for epoch in epochs]
        sc=plt.scatter(loss, upperh, c=epochs, cmap=cm, edgecolor='none', label="$H_{KL}$")
        plt.scatter(loss, lowerh, c=epochs, cmap=cm, edgecolor='none', label="$H_{BD}$")
    plt.colorbar(sc, label='Epoch')
    plt.xlabel('Cross-entropy loss')
    plt.ylabel('H(hidden layer))')

In [None]:
sortedk = sorted(reporter.saved_batch_logs.keys())
meangrads = np.array([reporter.saved_batch_logs[k][0] for k in sortedk])
stdgrads  = np.array([reporter.saved_batch_logs[k][1] for k in sortedk])

plt.plot(sortedk, meangrads, label='m')
plt.hold('on')
plt.plot(sortedk, stdgrads, label='std')


asdfassdfsfd
