In [2]:
import model as model_arch
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import io
import h5py
sns.set()

In [3]:
N, T, m = 10000, 15, (2,4,6,8,10)

In [None]:
model = model_arch.RETAIN_LSTM(n_feat=len(m),
                              Wemb_size=30,
                              lstm_sizes="[15, 15]",
                              fc_sizes="[30]",
                              l1=1e-3)

In [None]:
tf.__version__, tf.test.is_gpu_available()

In [4]:
def generate_dataset(N, T, m):
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    mu, sigma = 0, 1
    x = np.random.normal(mu, sigma, N * T * len(m)).astype(np.float32).reshape((N,T, len(m)))
    z = np.random.normal(mu, sigma, N) / 100
    xz = np.copy(x)
    for i, mi in enumerate(m):
        x[:, :-mi, i] = 0
    y = sigmoid(x.sum(axis=1).sum(axis=1) + z)
    y[y > 0.5] = 1.
    y[y < 0.5] = 0.
    
    return xz, y.reshape((-1, 1))

In [5]:
x, y = generate_dataset(N=N, T=T,m=m)
xt, yt = generate_dataset(N=N, T=T, m=m)
x.shape, y.shape

((10000, 15, 5), (10000, 1))

In [6]:
fd = h5py.File('./data/toyset.hdf5', 'w')

In [7]:
t

<HDF5 dataset "Y": shape (10000, 1), type "<f8">

In [8]:
fd['Names'][10]

b'datapoint: 10'

In [9]:
Names = fd['Names']
dtype = Names.dtype
names_copy = np.empty((N), dtype=dtype)

Names.read_direct(names_copy)

names_copy[3]

b'datapoint: 3'

In [10]:
fd.close()

In [None]:
model.model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), 
                    loss=keras.losses.binary_crossentropy,
                    metrics=[keras.metrics.binary_accuracy]
                   )

In [None]:
model.model.summary()

In [None]:
log_dir="logs/fit/test_dataset/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

cp_callback = tf.keras.callbacks.ModelCheckpoint(log_dir + "/checkpoints/cp-{epoch:04d}.ckpt",
                                                 save_weights_only=True,
                                                 verbose=0)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, 
                                                  histogram_freq=1, 
                                                  write_images=True, 
                                                  profile_batch=0)
file_writer = tf.summary.create_file_writer(log_dir)

history = model.model.fit(x,y,
               validation_data=(xt, yt),
               verbose=0,
               batch_size=256,
               epochs=10,
               callbacks=[tensorboard_callback, cp_callback])

In [None]:
history.epoch

In [None]:
xp, yp = generate_dataset(N=1000, T=T, m=m)

contribution = model.get_contribution_coefficients(xp)
for c,x in zip(contribution[:3],xp[:3]):
    plt.figure()
    plt.matshow(np.abs(c * x).T, cmap='hot')
    plt.colorbar()

In [None]:
c = contribution.mean(axis=0).T
plt.matshow(np.abs(c), cmap='hot')
plt.colorbar()
plt.figure()
for mi,ci in zip(m,c):
    plt.plot(ci, label='m=%d' % mi)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

In [None]:
model.model.evaluate(xp, yp)

In [None]:
epoch = 10
model.model.load_weights(log_dir + f"/checkpoints/cp-{epoch:04d}.ckpt")

In [None]:
model.model.evaluate(xp, yp)

In [None]:
def plot_to_image(figure):
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close(figure)
    buf.seek(0)
    image = tf.image.decode_png(buf.getvalue(), channels=4)
    image = tf.expand_dims(image, 0)
    return image

def image(c, x):
    from pylab import annotate
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    figure = plt.figure(figsize=(30,10))
    ax_coeff = plt.subplot(1, 2, 1, title='Contribution coefficient')
    plt.xlabel('T, time')
    plt.yticks([])
    im = plt.imshow(c.T, cmap='hot')
    ax_coeff.set_aspect(4)
    # add labels
    for j in range(c.shape[1]):
        annotate('label %d' % j, xy=(0, j), xytext=(-9, j), fontsize=10)

    plt.colorbar(im,fraction=0.026, pad=0.04)
        
    ax_contrib = plt.subplot(1, 2, 2, title='Total contribution')
    plt.xlabel('T, time')
    plt.yticks([])
    im = plt.imshow((c * x).T, cmap='hot')
    ax_contrib.set_aspect(4)
    # add labels
    for j in range(c.shape[1]):
        annotate('label %d' % j, xy=(0, j), xytext=(-9, j), fontsize=10)

#     divider = make_axes_locatable(figure.axes[0])
#     cax = divider.append_axes("right", size="5%", pad=0.05)
    plt.colorbar(im,fraction=0.026, pad=0.04)
    return figure

def save_images(C, xp, writer):
    with writer.as_default():
        for i,(c,x) in enumerate(zip(C, xp)):
            img = image(c,x)
            tf.summary.image('datapoint : %d' %i, plot_to_image(img), step=0)

In [None]:
save_images(contribution[:30], xp[:30], file_writer)

In [None]:
_ = image(np.random.random((90,13)), np.random.random((90,13)))

In [None]:
not 'v1' in tf.version.GIT_VERSION