In [1]:
import os, sys, keras
import numpy as np
import pandas as pd

Using TensorFlow backend.


In [2]:
from graphTools import *
from layerTools import *

In [3]:

# Read in graphs
graph_files = glob.glob('qm9graph/*.csv')[0:150]
mygraphs = [readgraph(fname) for fname in graph_files]
headers_onehot_dict, matrices_onehot_dict, mygraphs_standardized = standardize_graphs(mygraphs, max_size=29)
headers_padded = np.array([i['header'] for i in mygraphs_standardized])
matrices_padded = np.array([i['matrix'] for i in mygraphs_standardized])
connectivities_padded = np.array([i['connectivity'] for i in mygraphs_standardized])
origHeaders_padded = np.array([i['origHeader'] for i in mygraphs_standardized])
origMatrices_padded = np.array([i['origMatrix'] for i in mygraphs_standardized])
print headers_padded.shape
print matrices_padded.shape
print connectivities_padded.shape

Header One-Hot Map: {8.0: array([0., 0., 0., 1., 0.]), 1.0: array([1., 0., 0., 0., 0.]), 9.0: array([0., 0., 0., 0., 1.]), 6.0: array([0., 1., 0., 0., 0.]), 7.0: array([0., 0., 1., 0., 0.])}
Bond One-Hot Map: {1.0: array([1., 0., 0.]), 2.0: array([0., 1., 0.]), 3.0: array([0., 0., 1.])}
Graph Size: 29
Number of dropped graphs: 0
(150, 29, 5)
(150, 29, 29, 3)
(150, 29, 29)


In [None]:
#Generator

def image_categorical_crossentropy(y_true, y_pred):
    # Boilerplate off the internet to do crossentropy for multi-classification tasks,
    # in this case assigning one-hot atom labels to atoms
    __EPS = 1e-5
    y_pred = K.clip(y_pred, __EPS, 1 - __EPS)
    return -K.mean(y_true * K.log(y_pred) + (1 - y_true) * K.log(1 - y_pred))
    
def gen_generator_model(num_nodes=29, input_shape=(1,), BO_dense_resize=200, BO_softmax_resize=4, labelator=None, labelatorTrainable=False):
    # Creates a Generator model. I chose the architecture arbitrarily
    # The architecture is random vector -> dense BO matrix -> softmax to dense 3D BO matrix
    # -> symmetrize -> dense -> dense to 29x29x6 matrix.
    
    inputLayer = keras.layers.Input(shape=input_shape)

    output__ = keras.layers.Dense(num_nodes*num_nodes, activation='relu')(inputLayer)
    output_ = keras.layers.Reshape((num_nodes,num_nodes))(output__)
    output_1 = keras.layers.Lambda(lambda x: K.expand_dims(x + K.permute_dimensions(x,(0,2,1)), axis=3))(output_)
    output_1_interpret = keras.layers.Dense(BO_dense_resize)(output_1)
    #print output_1
    #output_as_BO_mat = keras.layers.Dense(BO_dense_resize, activation='relu')(output_1)
    #output_as_BO_mat_stacked = keras.layers.Lambda(lambda x: K.concatenate([x, K.permute_dimensions(x,(0,2,1,3))], axis=3))(output_as_BO_mat)
    
    BO_mat_softmax_ = keras.layers.Dense(BO_softmax_resize, activation='softmax')(output_1_interpret)
    
    # Symmetrize the BO matrix
    BO_mat_softmax = keras.layers.Lambda(lambda x: (x+K.permute_dimensions(x,(0,2,1,3)))/2.0)(BO_mat_softmax_)
    
    connectivity = keras.layers.Lambda(lambda x: K.sum(x[:,:,:,1:], axis=3))(BO_mat_softmax)
    assert len(inputLayer.shape) == 2, "Unhandled Input Layer size."

    annotations__ = keras.layers.Dense(num_nodes)(inputLayer)
    annotations_ = keras.layers.Reshape((num_nodes,1))(annotations__)
    annotations = keras.layers.Dense(6, activation='softmax')(annotations_)
    if labelator == 'default':
        pass
    else:
        # Labelator is a pretrained network (legacy code)
        if labelator == None:
            annotations = keras.layers.Lambda(lambda x: x*0)(annotations)
            annotations.trainable = False
        else:
            # Labelator is an NN.
            labelator.trainable = labelatorTrainable
            annotations = labelator([BO_mat_softmax, annotations, connectivity])[1]
    model = keras.models.Model(inputs=inputLayer, outputs=[BO_mat_softmax, annotations, connectivity])
    return model

generator_input = keras.layers.Input(shape=(15,))
#bond_hiddens_input = keras.layers.Input(shape=(num_nodes,num_nodes,bond_hidden_length))
#atom_hiddens_input = keras.layers.Input(shape=(num_nodes,atom_hidden_length))
#connectivity_input = keras.layers.Input(shape=(num_nodes,num_nodes))


# Construct generator portion of GAN
myGen_intermediate = gen_generator_model(num_nodes=29, input_shape=(15,), BO_dense_resize=30, BO_softmax_resize=4,\
                            labelator=None, labelatorTrainable=False)
myGen_conv_1 = generate_gc_model(num_nodes=29, atom_hidden_length=6, bond_hidden_length=4, hide_atoms=False,\
                      message_dense_resize=30, atom_dense_resize=30, bond_dense_resize=1000, do_readout = False)

myGen_intermediate_output = myGen_intermediate(generator_input)
myGen_output_1 = myGen_conv_1(myGen_intermediate_output)
#myGen_output_2 = myGen_conv_1(myGen_output_1)
myGen = keras.models.Model(inputs=generator_input, outputs=myGen_output_1)

# Construct discriminator portion of GAN
myDisc = generate_gc_model(num_nodes=29, atom_hidden_length=6, bond_hidden_length=4, hide_atoms=False,\
                      message_dense_resize=30, atom_dense_resize=30, bond_dense_resize=None, do_readout = True)

myDisc.compile(optimizer=keras.optimizers.Adam(lr=0.001),
              loss='binary_crossentropy',
              metrics=['mean_squared_error', 'mean_absolute_error','binary_crossentropy'])

# Hook everything up together
myGenOutput = myGen(generator_input)
myDiscOutput = myDisc(myGenOutput)
myBigModel = keras.models.Model(inputs=generator_input, outputs=myDiscOutput)

myDisc.trainable = False

# Compile the GAN
myBigModel.compile(optimizer=keras.optimizers.Adam(lr=0.001),
              loss='binary_crossentropy',
              metrics=['mean_squared_error', 'mean_absolute_error','binary_crossentropy'])
#myDisc.trainable = True
#inputs = np.random.random((len(connectivities_padded),))


# Input the training data
inputs = [matrices_padded, headers_padded, connectivities_padded]

def train_discriminator(D, G, noise_in):
    fakeInputs = G.predict(noise_in)
    allInputs = [np.vstack((i,j)) for i,j in zip(inputs, fakeInputs)]
    
    allOutputs = [1]*len(inputs[0]) + [0]*len(fakeInputs[0])
    inputObjects = zip(*allInputs)

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(inputObjects, allOutputs, test_size=0.33)

    D.fit(map(list,zip(*X_train)),y_train, epochs=1, verbose=True)

# Train the GAN
for j in range(10):
    print "Iteration:", j
    noise_input = np.random.normal(0,1,(len(inputs[0]),15))
    train_discriminator(myDisc,myGen,noise_input)
    myBigModel.fit(noise_input, [1]*len(noise_input), epochs=1, verbose=True)
print "done"

In [None]:
gen_onehot_dict_default(headers_padded)