In [0]:
import os

# Reference

* https://github.com/Kajiyu/dnc-py3.git
* https://github.com/Mostafa-Samir/DNC-tensorflow.git

# Source Code

In [0]:
!git clone https://github.com/Kajiyu/dnc-py3.git

Cloning into 'dnc-py3'...
remote: Enumerating objects: 317, done.[K
remote: Total 317 (delta 0), reused 0 (delta 0), pack-reused 317[K
Receiving objects: 100% (317/317), 43.27 MiB | 22.85 MiB/s, done.
Resolving deltas: 100% (197/197), done.
Checking out files: 100% (115/115), done.


In [0]:
!git clone https://github.com/Mostafa-Samir/DNC-tensorflow.git

Cloning into 'DNC-tensorflow'...
remote: Enumerating objects: 641, done.[K
remote: Total 641 (delta 0), reused 0 (delta 0), pack-reused 641[K
Receiving objects: 100% (641/641), 89.78 MiB | 28.35 MiB/s, done.
Resolving deltas: 100% (317/317), done.
Checking out files: 100% (56/56), done.


In [0]:
!mv ./DNC-tensorflow/tasks/copy/checkpoints ./dnc-py3/tasks/copy/

In [0]:
print(next(os.walk("./dnc-py3/tasks/copy/checkpoints"))[1])

['model-series-2', 'model-series-4', 'model-single-10']


# Copy Train Series

In [0]:
import os
import sys
import itertools
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1 import make_axes_locatable

sys.path.append(os.path.join(".","dnc-py3"))
from dnc.dnc import DNC

sys.path.append(os.path.join(".","dnc-py3","tasks","copy"))
from feedforward_controller import FeedforwardController

%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 15)

In [0]:
def llprint(message):
    sys.stdout.write(message)
    sys.stdout.flush()

In [0]:
def generate_data(batch_size, length, size):
    """
    batch_size
    """
    input_data = np.zeros((batch_size, 2 * length + 1, size), dtype=np.float32)
    target_output = np.zeros((batch_size, 2 * length + 1, size), dtype=np.float32)

    sequence = np.random.binomial(1, 0.5, (batch_size, length, size - 1))

    input_data[:, :length, :size - 1] = sequence
    input_data[:, length, -1] = 1  # the end symbol
    target_output[:, length + 1:, :size - 1] = sequence

    return input_data, target_output

In [0]:
inputs, targets = generate_data(2, 1, 4)
print(inputs, inputs.shape)

[[[1. 0. 0. 0.]
  [0. 0. 0. 1.]
  [0. 0. 0. 0.]]

 [[1. 1. 0. 0.]
  [0. 0. 0. 1.]
  [0. 0. 0. 0.]]] (2, 3, 4)


In [0]:
def binary_cross_entropy(predictions, targets):

    return tf.reduce_mean(
        -1 * targets * tf.log(predictions) - (1 - targets) * tf.log(1 - predictions)
    )

## Starting Training

In [0]:
print(next(os.walk(os.path.join(".","dnc-py3","tasks","copy","checkpoints")))[1])

['model-series-2', 'model-series-4', 'model-single-10']


In [0]:
!mv "./dnc-py3/tasks/copy/checkpoints" "./dnc-py3/tasks/copy/checkpoints_repo" 

In [0]:
!mkdir "./dnc-py3/tasks/copy/checkpoints"

In [0]:
dirname = os.path.join(".","dnc-py3","tasks","copy")
ckpts_dir = os.path.join(dirname , 'checkpoints')
tb_logs_dir = os.path.join(dirname, 'logs')

batch_size = 10
input_size = output_size = 6
series_length = 4
sequence_max_length = 44
words_count = 10
word_size = 10
read_heads = 1

learning_rate = 1e-4
momentum = 0.9

from_checkpoint = None
iterations = 10000
start_step = 0

graph = tf.Graph()

with graph.as_default():
    with tf.Session(graph=graph) as session:

        llprint("Building Computational Graph ... ")

        optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum)
        summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph)

        ncomputer = DNC(
            FeedforwardController,
            input_size,
            output_size,
            sequence_max_length,
            words_count,
            word_size,
            read_heads,
            batch_size
        )

        output, _ = ncomputer.get_outputs()
        squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6, 1. - 1e-6)

        loss = binary_cross_entropy(squashed_output, ncomputer.target_output)

        summeries = []

        gradients = optimizer.compute_gradients(loss)
        for i, (grad, var) in enumerate(gradients):
            if grad is not None:
                summeries.append(tf.summary.histogram(var.name + '/grad', grad))
                gradients[i] = (tf.clip_by_value(grad, -10, 10), var)

        apply_gradients = optimizer.apply_gradients(gradients)

        summeries.append(tf.summary.scalar("Loss", loss))

        summerize_op = tf.summary.merge(summeries)
        no_summerize = tf.no_op()

        llprint("Done!\n")

        llprint("Initializing Variables ... ")
        session.run(tf.initialize_all_variables())
        llprint("Done!\n")

        if from_checkpoint is not None:
            llprint("Restoring Checkpoint %s ... " % (from_checkpoint))
            ncomputer.restore(session, ckpts_dir, from_checkpoint)
            llprint("Done!\n")


        last_100_losses = []

        start = 0 if start_step == 0 else start_step + 1
        end = start_step + iterations + 1

        for i in range(start, end):
            llprint("\rIteration %d/%d" % (i, end - 1))

            input_series = []
            output_series = []

            for k in range(series_length):
                input_data, target_output = generate_data(batch_size, 5, input_size)
                input_series.append(input_data)
                output_series.append(target_output)

            one_big_input = np.concatenate(input_series, axis=1)
            one_big_output = np.concatenate(output_series, axis=1)

            summerize = (i % 100 == 0)
            take_checkpoint = (i != 0) and (i % iterations == 0)

            loss_value, _, summary = session.run([
                loss,
                apply_gradients,
                summerize_op
            ], feed_dict={
                ncomputer.input_data: one_big_input,
                ncomputer.target_output: one_big_output,
                ncomputer.sequence_length: sequence_max_length,
                ncomputer.input_mode: np.zeros((batch_size, sequence_max_length, output_size))
            })

            last_100_losses.append(loss_value)
            
            #if summary is not None:
            #  summerizer.add_summary(summary, i)

            if summerize:
                llprint("\n\tAvg. Logistic Loss: %.4f\n" % (np.mean(last_100_losses)))
                last_100_losses = []

            if take_checkpoint:
                llprint("\nSaving Checkpoint ... "),
                ncomputer.save(session, ckpts_dir, 'step-%d' % (i))
                llprint("Done!\n")

Building Computational Graph ... INFO:tensorflow:Summary name controller/layer1_W:0/grad is illegal; using controller/layer1_W_0/grad instead.
INFO:tensorflow:Summary name controller/layer2_W:0/grad is illegal; using controller/layer2_W_0/grad instead.
INFO:tensorflow:Summary name controller/layer1_b:0/grad is illegal; using controller/layer1_b_0/grad instead.
INFO:tensorflow:Summary name controller/layer2_b:0/grad is illegal; using controller/layer2_b_0/grad instead.
INFO:tensorflow:Summary name controller/interface_weights:0/grad is illegal; using controller/interface_weights_0/grad instead.
INFO:tensorflow:Summary name controller/nn_output_weights:0/grad is illegal; using controller/nn_output_weights_0/grad instead.
INFO:tensorflow:Summary name controller/mem_output_weights:0/grad is illegal; using controller/mem_output_weights_0/grad instead.
Done!
Initializing Variables ... Done!
Iteration 0/10000
	Avg. Logistic Loss: 0.7090
Iteration 100/10000
	Avg. Logistic Loss: 0.6401
Iteratio

In [0]:
print(next(os.walk(ckpts_dir))[1])

['step-10000', 'step-1000']


In [0]:
print(next(os.walk(os.path.join(ckpts_dir, "step-10000")))[2])

['model.ckpt.meta', 'model.ckpt.index', 'checkpoint', 'model.ckpt.data-00000-of-00001']


In [0]:
os.path.join(ckpts_dir, "step-10000")

'./dnc-py3/tasks/copy/checkpoints/step-10000'

In [0]:
!zip step-10000 ./dnc-py3/tasks/copy/checkpoints/step-10000/*

  adding: dnc-py3/tasks/copy/checkpoints/step-10000/checkpoint (deflated 42%)
  adding: dnc-py3/tasks/copy/checkpoints/step-10000/model.ckpt.data-00000-of-00001 (deflated 7%)
  adding: dnc-py3/tasks/copy/checkpoints/step-10000/model.ckpt.index (deflated 32%)
  adding: dnc-py3/tasks/copy/checkpoints/step-10000/model.ckpt.meta (deflated 91%)
