In [1]:
import keras
from keras.models import Sequential, Model
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, Activation

Using TensorFlow backend.


In [2]:
import numpy as np
from multigraph import MultiGraph

In [None]:
#import csv
import unicodecsv as csv
""" EXAMPLE
add,IS,operator
subtract,IS,operator
multiply,IS,operator
divide,IS,operator
open_closure,IS,operator
close_closure,IS,operator
"""
op_graph = MultiGraph()
#with open('operator_graph.csv', 'r') as csvfile:
n = 45
for i in range(n):
    print '.',
    with open('aifb_csv/aifb_relation_'+str(i)+'.csv', 'r') as csvfile:
        graphreader = csv.reader(csvfile, delimiter=",")
        for row in graphreader:
            op_graph.add_connection(row)
print('\n loaded '+str(op_graph.n_rels)+' relations.')

import pickle
def save_object(obj, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

#save the adjacency matrix as well
save_object(op_graph, 'adj_graph.pkl')
print("Saved graph object to disk")

In [3]:
import pickle
with open('adj_graph.pkl', 'rb') as inobj:
    op_graph = pickle.load(inobj)

In [4]:
a_graph = op_graph.get_adjacency_matrix()
print("number of nodes",op_graph.n_nodes)
print("number of relations",op_graph.n_rels)
print("relations:")
for k,v in op_graph.rel_counter.iteritems():
    print(k.rsplit('/',1)[-1], v)

('number of nodes', 8284)
('number of relations', 45)
relations:
(u'ontology#publishes', 1217)
(u'ontology#number', 145)
(u'ontology#isWorkedOnBy', 571)
(u'ontology#worksAtProject', 200)
(u'ontology#dealtWithIn', 357)
(u'owl#onProperty', 152)
(u'ontology#type', 50)
(u'ontology#author', 3986)
(u'ontology#abstract', 534)
(u'ontology#carriedOutBy', 79)
(u'ontology#month', 759)
(u'ontology#phone', 227)
(u'22-rdf-syntax-ns#type', 4124)
(u'ontology#address', 202)
(u'ontology#note', 114)
(u'ontology#publication', 4163)
(u'ontology#financedBy', 65)
(u'ontology#editor', 190)
(u'ontology#pages', 548)
(u'owl#inverseOf', 10)
(u'ontology#projectInfo', 952)
(u'rdf-schema#range', 1)
(u'ontology#booktitle', 765)
(u'ontology#isAbout', 2477)
(u'ontology#finances', 68)
(u'ontology#howpublished', 49)
(u'ontology#member', 339)
(u'owl#allValuesFrom', 152)
(u'ontology#edition', 12)
(u'ontology#isbn', 16)
(u'ontology#hasProject', 952)
(u'ontology#journal', 161)
(u'ontology#year', 1227)
(u'ontology#title', 122

In [5]:
x_train = a_graph
print('input dims:',x_train.shape)
print(type(x_train))
sum(list(x_train[5]))

('input dims:', (8284, 372780))
<class 'scipy.sparse.csr.csr_matrix'>


<1x372780 sparse matrix of type '<type 'numpy.int8'>'
	with 18 stored elements in Compressed Sparse Row format>

In [6]:
# Need to make a custom Dense layer to tie weights between the encoder and decoder
from keras import backend as K

class DenseTied(Dense):
    def __init__(self, master_layer, **kwargs):
        #output_dim needs to be equal to the input dimensions of the master_layer
        self.output_dim = master_layer.input_shape[-1]
        super(DenseTied, self).__init__(self.output_dim, **kwargs)
        self.master_layer = master_layer
    
    def build(self,input_shape):
        assert len(input_shape) >= 2
        input_dim = input_shape[-1]

        self.kernel = K.transpose(self.master_layer.kernel)
        if self.use_bias:
            self.bias = self.add_weight(shape=(self.output_dim,),
                            initializer=self.bias_initializer,
                            name='bias',
                            regularizer=self.bias_regularizer,
                            constraint=self.bias_constraint)
        else:
            self.bias = None
        self.built = True
        
    def call(self, inputs):
        output = K.dot(inputs, K.transpose(self.master_layer.kernel))
        if self.use_bias:
            output = K.bias_add(output, self.bias, data_format='channels_last')
        if self.activation is not None:
            output = self.activation(output)
        return output


In [9]:
encoding_dim = 16
input_dim = x_train.shape[1]

inputs = Input(shape=(input_dim,), sparse=True, name="inputs")
# Encoder Layers
encode_layer1 = Dense(4 * encoding_dim, activation='relu', name="encode_layer1")
encode_layer2 = Dense(2 * encoding_dim, activation='relu', name="encode_layer2")
coding_layer = Dense(encoding_dim, activation='relu', name="coding_layer")

encoding_1 = encode_layer1(inputs)
encoding_2 = encode_layer2(encoding_1)
the_code = coding_layer(encoding_2)

# Decoder Layers
#decode_layer1 = Dense(2 * encoding_dim, activation='tanh',name="decode_layer1")
#decode_layer2 = Dense(4 * encoding_dim, activation='tanh',name="decode_layer2")
#reconstruction_layer = Dense(input_dim, activation='tanh',name="reconstruction_layer")
decode_layer1 = DenseTied(coding_layer, activation='relu',name="decode_layer1")
decode_layer2 = DenseTied(encode_layer2, activation='relu',name="decode_layer2")
recon_layer = DenseTied(encode_layer1, activation='relu',name="reconstruction_layer")

decoding_1 = decode_layer1(the_code)
decoding_2 = decode_layer2(decoding_1)
reconstruction = recon_layer(decoding_2)

ae = Model(inputs=inputs, outputs=reconstruction)
#monitor = EarlyStopping(monitor='loss', min_delta=0.0001, patience=5, verbose=1, mode='auto')
ae.compile(optimizer='adam', loss='mse')
ae.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          (None, 372780)            0         
_________________________________________________________________
encode_layer1 (Dense)        (None, 64)                23857984  
_________________________________________________________________
encode_layer2 (Dense)        (None, 32)                2080      
_________________________________________________________________
coding_layer (Dense)         (None, 16)                528       
_________________________________________________________________
decode_layer1 (DenseTied)    (None, 32)                32        
_________________________________________________________________
decode_layer2 (DenseTied)    (None, 64)                64        
_________________________________________________________________
reconstruction_layer (DenseT (None, 372780)            372780    
Total para

In [11]:
callbacks = []
DEBUG = False

class WeightHistory(keras.callbacks.Callback):
    def on_batch_end(self, batch, logs={}):
        print(batch)
        print('coding layer weights')
        print(self.model.get_layer('coding_layer').get_weights())
        print(self.model.get_layer('coding_layer').get_weights()[0].shape)
        print('decoding layer weights')
        print(np.transpose(self.model.get_layer('decode_layer1').get_weights()))
        print(np.transpose(self.model.get_layer('decode_layer1').get_weights()[0]).shape)


history = WeightHistory()

if DEBUG:
    callbacks.append(history)


In [12]:
ae.fit(x_train, x_train, epochs=2, verbose=1,callbacks=callbacks)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0xb3281ecd0>

In [13]:
#coding_model = Model(inputs=ae.inputs, outputs=ae.get_layer("the_code").output)
coding_model = Model(inputs=inputs, outputs=the_code)

In [14]:
#save the embeddings in order to plot them later
# serialize model to JSON
model_json = coding_model.to_json()
with open("coding_model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
coding_model.save_weights("coding_model.h5")
print("Saved model to disk")

Saved model to disk
