In [1]:
import os
import sys
import timeit
import numpy
from keras.models import *
from keras.layers.core import *
from keras.layers.embeddings import *
from keras.optimizers import SGD,Adadelta,Adam
from keras.regularizers import l2, l1l2
from keras.constraints import unitnorm,nonneg
from keras.layers.advanced_activations import ThresholdedReLU
from keras import backend as K
from scipy.io import loadmat
from scipy.io import savemat
from keras.models import model_from_json
from IPython.display import SVG
from keras.utils.visualize_util import to_graph
from keras.callbacks import ModelCheckpoint,RemoteMonitor
import theano
import theano.tensor as T
import h5py
from theano.tensor.shared_randomstreams import RandomStreams
to_path = "./"

Using Theano backend.


Using gpu device 0: GeForce GTX 980 (CNMeM is enabled)


In [2]:
term_matrix = loadmat(to_path + "t1_termatrix.mat", variable_names = "target").get("target").astype("float32")
term_matrix.shape

(28956, 300)

In [3]:
class SymmetricAutoencoder(Layer):
    '''AutoEncoder where reconstruction = reconstruction_activation(activation(x * W) * W')
    # Input shape
        2D tensor with shape: `(nb_samples, input_dim)`.
    # Output shape
        2D tensor with shape: `(nb_samples, input_dim)` if output_reconstruction = True,
        shape: `(nb_samples,output_dim)` if output_reconstruction = False
    # Arguments
        output_dim: int > 0.
        init: name of initialization function for the weights of the layer
            (see [initializations](../initializations.md)),
            or alternatively, Theano function to use for weights
            initialization. This parameter is only relevant
            if you don't pass a `weights` argument.
        activation: name of activation function to use
            (see [activations](../activations.md)),
            or alternatively, elementwise Theano function.
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of numpy arrays to set as initial weights.
            The list should have 1 element, of shape `(input_dim, output_dim)`.
        output_reconstruction: Whether, when not being trained, the output of the 
            layer should be the reconstructed input, or the hidden layer activations.
        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
            (eg. L1 or L2 regularization), applied to the main weights matrix.
        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
            applied to the network output.
        W_constraint: instance of the [constraints](../constraints.md) module
            (eg. maxnorm, nonneg), applied to the main weights matrix.
        input_dim: dimensionality of the input (integer).
            This argument (or alternatively, the keyword argument `input_shape`)
            is required when using this layer as the first layer in a model.
    '''
    input_ndim = 2

    def __init__(self, output_dim, init='glorot_uniform', activation='linear',
                 reconstruction_activation='linear', weights=None,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 output_reconstruction=False,
                 W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.reconstruction_activation = activations.get(reconstruction_activation)
        self.output_reconstruction = output_reconstruction
        self.output_dim = output_dim
        self.pretrain = True

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)
        self.constraints = [self.W_constraint, self.b_constraint]

        self.initial_weights = weights

        self.input_dim = input_dim
        if self.input_dim:
            kwargs['input_shape'] = (self.input_dim,)
        self.input = K.placeholder(ndim=2)
        super(SymmetricAutoencoder, self).__init__(**kwargs)

    def build(self):
        input_dim = self.input_shape[1]

        self.W = self.init((input_dim, self.output_dim))

        self.params = [self.W]

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights

    @property
    def output_shape(self):
        if self.pretrain or self.output_reconstruction: 
            return self.input_shape
        else:
            return (self.input_shape[0], self.output_dim)

    def get_output(self, train=False):
        X = self.get_input(train)
        if self.pretrain or self.output_reconstruction: 
            output = self.reconstruction_activation(K.dot(self.activation(K.dot(X, self.W)), K.transpose(self.W)))
            return output            
        else:
            output = self.activation(K.dot(X, self.W))
            return output

    def get_config(self):
        config = {'name': self.__class__.__name__,
                  'output_dim': self.output_dim,
                  'init': self.init.__name__,
                  'activation': self.activation.__name__,
                  'reconstruction_activation': self.reconstruction_activation.__name__,
                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
                  'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
                  'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
                  'input_dim': self.input_dim}
        base_config = super(SymmetricAutoencoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [4]:
encoder = Sequential()
encoder.add(Embedding(
        input_dim = term_matrix.shape[0], 
                          output_dim = 300,
                          weights = [term_matrix], 
                           trainable = False,
                           input_length = 1)
    )
encoder.add(Flatten())
encoder.add(SymmetricAutoencoder(
        activation = 'sigmoid',
        reconstruction_activation = 'linear',
        output_dim=40
    ))
inputs = numpy.reshape(numpy.arange(term_matrix.shape[0]), (term_matrix.shape[0], 1))
outputs = term_matrix

In [None]:
encoder.compile(loss = 'mse', optimizer = 'Adadelta')

history = encoder.fit(inputs, outputs, nb_epoch = 1000, batch_size = 1)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 90/1000
Epoch 93/1000
Epoch 98/1000
Epoch 101/1000
Epoch 106/1000
Epoch 109/1000
Epoch 112/1000
Epoch 117/1000
Epoch 122/1000
Epoch 127/1000
Epoch 130/1000
Epoch 135/1000
Epoch 138/1000
Epoch 141/1000
Epoch 149/1000
Epoch 152/1000
Epoch 157/1000
Epoch 160/1000
Epoch 163/1000
Epoch 170/1000
Epoch 174/1000
Epoch 179/1000
Epoch 182/1000
Epoch 187/1000
Epoch 194/1000
Epoch 199/1000
Epoch 202/1000
Epoch 207/1000
Epoc

In [6]:
encoder.save_weights("W1_pretrain_40.hdf5")
#encoder.load_weights("W1_pretrain_Adam_1000_loss_0037.hdf5")

In [31]:
encoder.output_reconstruction = False
encoder.pretrain = False
activations = encoder.predict(inputs, batch_size = 15000)
#savemat("./t1_ntm_pretrain.mat", { 'activations' : activations,
#                                 'W2' : encoder.get_weights()[1]})
activations.shape
import h5py
h5f = h5py.File("activations.hdf5")
h5f.create_dataset('activations', data = activations)
h5f.close()

(28956, 300)

In [23]:
#get initial weights for W2 from the autoencoder
#pretrained_W2 = encoder.get_weights()[1]
#pretrained_W2 = loadmat(to_path + "t1_ntm_pretrain.mat", variable_names = "W2").get("W2").astype("float32")
h5w2 = h5py.File('W1_pretrain_40.hdf5', 'r')
h5w2['/layer_2'].items()
pretrained_W2 = h5w2['layer_2/param_0'][:]
h5w2.close()
pretrained_W2.shape

(300, 40)

In [4]:
#get initial weights for W1 that were pretrained in R based on the autoencoder activations
#pretrained_W1 = loadmat(to_path + "t1_ntm_pret.mat", variable_names = "w1").get("w1").astype("float32") 

examples = loadmat(to_path + "t1_ntm_pret.mat", variable_names = "examples").get("examples")
# Take the multiple sets and combine them into one big super-epoch
examples = numpy.vstack(tuple([examples[:,(0,1,x)] for x in range(2, examples.shape[1])]))

In [5]:
#pretrained_W1 = loadmat(to_path + "t1_ntm_w1.mat", variable_names = "w1").get("w1").astype("float32") 
h5w1 = h5py.File('w1_pretrain.hdf5', 'r')
pretrained_W1 = numpy.transpose(h5w1['w1'][:])
h5w1.close()
pretrained_W1.shape

(954905, 40)

In [6]:
(n_docs, n_topics, n_terms, n_epochs) = (pretrained_W1.shape[0], 
                               pretrained_W1.shape[1], 
                               term_matrix.shape[0], 
                                        examples.shape[1] - 2)
(n_docs, n_topics, n_terms, n_epochs)

(954905, 40, 28956, 1)

In [24]:
class DenseNoBias(Layer):
    '''Fully connected NN layer with no bias term.
    # Input shape
        2D tensor with shape: `(nb_samples, input_dim)`.
    # Output shape
        2D tensor with shape: `(nb_samples, output_dim)`.
    # Arguments
        output_dim: int > 0.
        init: name of initialization function for the weights of the layer
            (see [initializations](../initializations.md)),
            or alternatively, Theano function to use for weights
            initialization. This parameter is only relevant
            if you don't pass a `weights` argument.
        activation: name of activation function to use
            (see [activations](../activations.md)),
            or alternatively, elementwise Theano function.
            If you don't specify anything, no activation is applied
            (ie. "linear" activation: a(x) = x).
        weights: list of numpy arrays to set as initial weights.
            The list should have 1 element, of shape `(input_dim, output_dim)`.
        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
            (eg. L1 or L2 regularization), applied to the main weights matrix.
        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
            applied to the network output.
        W_constraint: instance of the [constraints](../constraints.md) module
            (eg. maxnorm, nonneg), applied to the main weights matrix.
        input_dim: dimensionality of the input (integer).
            This argument (or alternatively, the keyword argument `input_shape`)
            is required when using this layer as the first layer in a model.
    '''
    input_ndim = 2

    def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
                 W_regularizer=None, activity_regularizer=None,
                 W_constraint=None, input_dim=None, **kwargs):
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.output_dim = output_dim

        self.W_regularizer = regularizers.get(W_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.constraints = [self.W_constraint]

        self.initial_weights = weights

        self.input_dim = input_dim
        if self.input_dim:
            kwargs['input_shape'] = (self.input_dim,)
        self.input = K.placeholder(ndim=2)
        super(DenseNoBias, self).__init__(**kwargs)

    def build(self):
        input_dim = self.input_shape[1]

        self.W = self.init((input_dim, self.output_dim))

        self.params = [self.W]

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights

    @property
    def output_shape(self):
        return (self.input_shape[0], self.output_dim)

    def get_output(self, train=False):
        X = self.get_input(train)
        output = self.activation(K.dot(X, self.W))
        return output

    def get_config(self):
        config = {'name': self.__class__.__name__,
                  'output_dim': self.output_dim,
                  'init': self.init.__name__,
                  'activation': self.activation.__name__,
                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
                  'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
                  'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
                  'input_dim': self.input_dim}
        base_config = super(DenseNoBias, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [25]:
# Build the actual training model

def build_ntm(term_matrix = term_matrix, 
              pre_W1 = pretrained_W1, 
              pre_W2 = pretrained_W2,  
              W2_l2 = 0.001
             ):
    
    n_docs = pretrained_W1.shape[0]
    n_topics = pretrained_W1.shape[1]
    n_terms = term_matrix.shape[0]
    
    ntm = Graph()
    
    ntm.add_input(name = "g", input_shape = (1,), dtype = "int")
    ntm.add_node(Embedding(input_dim = n_terms, 
                          output_dim = 300,
                          weights = [term_matrix], 
                           trainable = False,
                           input_length = 1), 
                 name = "le", input = "g")
    ntm.add_node(Flatten(), input = "le", name = "le_")
    ntm.add_node(DenseNoBias(n_topics, activation = "sigmoid", 
                       weights = [pre_W2], 
                       W_regularizer = l2(W2_l2)
                      ),
                 name = "lt", input = "le_")
    
    ntm.add_input(name = "d_pos", input_shape = (1,), dtype = "int")
    ntm.add_input(name = "d_neg", input_shape = (1,), dtype = "int")
    ntm.add_shared_node(Embedding(input_dim = n_docs, 
                                  output_dim = n_topics, 
                                  weights = [pre_W1], 
                                  input_length = 1),
                        name = "topicmatrix",
                        inputs =  ["d_pos", "d_neg"], 
                        outputs = ["wd_pos", "wd_neg"],
                        merge_mode = None)
    ntm.add_node(Flatten(), name = "wd_pos_", input = "wd_pos")
    ntm.add_node(Flatten(), name = "wd_neg_", input = "wd_neg")
    ntm.add_node(Activation("softmax"), name = "ld_pos", input = "wd_pos_")
    ntm.add_node(Activation("softmax"), name = "ld_neg", input = "wd_neg_")
    
    ntm.add_node(Layer(),
                       name = "ls_pos", 
                       inputs = ["lt", "ld_pos"], 
                       merge_mode = 'dot', dot_axes = -1)# , create_output = True)
    ntm.add_node(Layer(), 
                       name = "ls_neg", 
                       inputs = ["lt", "ld_neg"], 
                        merge_mode = 'dot', dot_axes = -1)#, create_output = True)
    return ntm

def add_fine_tuning(ntm = None):
    import theano.tensor as T
    def output_shape(input_shape):
        return (None, 1)
    
    def sub_merge(layers):
        import theano.tensor as T
#        ls_pos = T.dot(layers[0], layers[1].T)
#        ls_neg = T.dot(layers[0], layers[2].T)
        ls_pos = layers[0]
        ls_neg = layers[1]
        #less = #T.mul(40000000,T.add(ls_neg, ls_pos))
        less = T.sub(ls_neg, ls_pos)
        return T.add(0.5, less)

    #def sumLam(x):
    #    return (0.5 + (x[1] - x[0]))

    summer = LambdaMerge(layers = [ntm.nodes["ls_pos"], 
                                   ntm.nodes["ls_neg"]], 
                     function = sub_merge,
                    output_shape = output_shape)
    ntm.add_node(summer, inputs = ["ls_pos", "ls_neg"], name = "summed", create_output = True)

    return ntm


#SVG(to_graph(ntm).create(prog='dot', format='svg'))

In [None]:
# Fine-tuning
ntm = build_ntm(W2_l2 = 0.001)
ntm = add_fine_tuning(ntm)

#def rawloss(x_train, x_test):
#    return x_train * x_test
def maxloss(y_true, y_predict):
    return K.maximum(y_true,y_predict)
#    return T.maximum(0., T.mul(y_true,y_predict ))

#ntm.load_weights("cpw4_starte0_batch10000_sgd001_e_01_0.499998.hdf5")

ntm.compile(loss = {'summed' : maxloss#, 
                #   'ls_pos' : 'binary_crossentropy', 
              #     'ls_neg' : 'binary_crossentropy'
                   },
           optimizer = SGD(lr = 0.01))

checkpointer = ModelCheckpoint(filepath="./cpw5_starte0_batch10000_sgd001_e_{epoch:02d}_{val_loss:.6f}.hdf5", 
                               monitor = 'val_loss', verbose = 1, save_best_only=False)

train_shape = (examples.shape[0], 1)
trainer = examples 
        
historylog = ntm.fit(data = {
            "g" : numpy.reshape(trainer[:,1], train_shape), 
            "d_pos" : numpy.reshape(trainer[:,0], train_shape), 
            "d_neg" : numpy.reshape(trainer[:,2], train_shape),
            "summed" : numpy.reshape(numpy.zeros(trainer.shape[0], dtype = theano.config.floatX),
                                     train_shape)#, 
#            "ls_pos" : numpy.reshape(numpy.ones(trainer.shape[0], dtype = theano.config.floatX),
#                                     train_shape),
#            "ls_neg" : numpy.reshape(numpy.zeros(trainer.shape[0], dtype = theano.config.floatX),
#                                     train_shape)
        }, callbacks = [checkpointer],
        validation_split = 0.02,
            nb_epoch = 20, 
            batch_size = 10000)

Train on 45794004 samples, validate on 934572 samples
Epoch 1/20
  330000/45794004 [..............................] - ETA: 10931s - loss: 0.6916

In [None]:
ntm.load_weights("cpw_new_startepoch0_00_0.5000.hdf5")
idxs = numpy.random.choice(trainer.shape[0], 200000, replace = False)
tester = trainer[idxs,:]
tester_shape = (tester.shape[0], 1)
ntm.evaluate(data = {
            "g" : numpy.reshape(tester[:,1], tester_shape), 
            "d_pos" : numpy.reshape(tester[:,0], tester_shape), 
            "d_neg" : numpy.reshape(tester[:,2], tester_shape),
            "loss_out" : numpy.reshape(numpy.ones(tester.shape[0], 
                                                  dtype = theano.config.floatX), tester_shape)
        }, batch_size = 20000)

In [None]:
0.49998338818550109,0.49998418092727659,0.49998493790626525,0.49998548328876496,0.4999860256910324,0.49998660981655119,0.49998704195022581,0.49998756051063536

In [None]:
[(x,  type(ntm.nodes[x]), ntm.nodes[x].output_shape) for x in ntm.nodes]

In [None]:
json_string = ntm.to_json()
open('ntm_final.json', 'w').write(json_string)
ntm.save_weights(to_path + 'ntm_finalweights_.h5', overwrite=True)

In [None]:
weights = ntm.get_weights()
(weights[0].shape, weights[1].shape, weights[2].shape, 
 weights[3].shape)

In [None]:
w = ntm.nodes["lt"].get_weights()
(w[0].shape, w[1].shape)

In [None]:
softies = weights[0][100000,:]
numpy.exp(softies)/numpy.sum(numpy.exp(softies))

In [None]:
1 / (1 + numpy.exp( - weights[2][100,:]))

In [None]:
# sNTM
def rawloss(x_train, x_test):
    return x_train * x_test
n_categories = 3
ntm.add_node(Dense(n_categories, activation = "sigmoid"), input = "ld_pos", name = "ll")
ntm.add_output(name = "label", input = "ll")
ntm.compile(loss = {'loss_out' : threshold,
                   'label' : 'categorical_crossentropy'}, 
           optimizer = "Adadelta")

checkpointer = ModelCheckpoint(filepath="./cpw_new_smallbatch_sgd001_epoch_{epoch:02d}_{val_loss:.5f}.hdf5", 
                               monitor = 'val_loss', verbose = 1, save_best_only=False)

train_shape = (examples.shape[0], 1)
trainer = examples 
        
historylog = ntm.fit(data = {
            "g" : numpy.reshape(trainer[:,1], train_shape), 
            "d_pos" : numpy.reshape(trainer[:,0], train_shape), 
            "d_neg" : numpy.reshape(trainer[:,2], train_shape),
            "loss_out" : numpy.reshape(numpy.ones(trainer.shape[0], 
                                                  dtype = theano.config.floatX), train_shape)
        # Need to add something here for the labels
        }, callbacks = [checkpointer],
        validation_split = 0.02,
            nb_epoch = 20, 
            batch_size = 10)