In [9]:
# math
import numpy as np

# ml
import tensorflow as tf
from keras import layers, models, optimizers
from keras import backend as K
from keras.utils import to_categorical
from capsulelayers import CapsuleLayer, PrimaryCap, Length, Mask
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Lambda
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras import callbacks
from keras.utils.vis_utils import plot_model

# visualization
import matplotlib.pyplot as plt

# aux
import sys
import os
import tqdm

# device check
from tensorflow.python.client import device_lib
print('Devices:', device_lib.list_local_devices())

%matplotlib inline

# GPU check
if not tf.test.gpu_device_name():
    print('No GPU found.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

print('Modules imported.')

Devices: [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1846541734458348979
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 231825408
locality {
  bus_id: 1
}
incarnation: 3428406662856406563
physical_device_desc: "device: 0, name: GeForce GTX 660, pci bus id: 0000:01:00.0, compute capability: 3.0"
]
Default GPU Device: /device:GPU:0
Modules imported.


# Defining the graph

In [10]:
# inputs dims since we are working with MNIST dataset
width = 28
height = 28
channels = 1

In [11]:
# Load MNIST data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Rescale -1 to 1
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = np.expand_dims(X_train, axis=3)

In [12]:
def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors

In [37]:
# discriminator structure
def build_discriminator():

        input_shape = [28, 28, 1]
        img = layers.Input(shape=input_shape)
        
        primary_cap = Conv2D(filters=24, kernel_size=5, strides=2, padding='valid', name='primarycap_conv2d')(img)
        primary_cap = Reshape(target_shape=[-1, 8], name='primarycap_reshape')(primary_cap)
        primary_cap = Lambda(squash, name='primarycap_squash')(primary_cap)
        
        # Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_vector]
        #primarycaps = PrimaryCap(conv1, dim_vector=8, n_channels=24, kernel_size=9, strides=2, padding='valid')
        # Layer 3: Capsule layer. Routing algorithm works here.
        #digitcaps = CapsuleLayer(num_capsule=8, dim_vector=16, num_routing=3, name='digitcaps')(primary_cap)
        x = layers.Flatten()(primary_cap)
        x = layers.Dense(512, activation='relu')(x)
        x = layers.LeakyReLU(alpha=0.2)(x)
        x = layers.Dense(256)(x)
        x = layers.LeakyReLU(alpha=0.2)(x)
        prediction = layers.Dense(1, activation='sigmoid')(x)
    

        return models.Model(img, prediction)

In [38]:
# generator structure
def build_generator():

        noise_shape = (100,)
        noise = layers.Input(shape=noise_shape)

        x = layers.Dense(256, input_shape=noise_shape)(noise)
        x = layers.LeakyReLU(alpha=0.2)(x)
        x = layers.BatchNormalization(momentum=0.8)(x)
        x = layers.Dense(512)(x)
        x = layers.LeakyReLU(alpha=0.2)(x)
        x = layers.BatchNormalization(momentum=0.8)(x)
        x = layers.Dense(1024)(x)
        x = layers.LeakyReLU(alpha=0.2)(x)
        x = layers.BatchNormalization(momentum=0.8)(x)
        x = layers.Dense(np.prod((28, 28, 1)), activation='tanh')(x)
        gen_out = layers.Reshape((28, 28, 1))(x)

        return Model(noise, gen_out)

In [39]:
# defining an optimizer
optimizer = Adam(0.0002, 0.5)

In [40]:
# build and compile the discriminator
discriminator = build_discriminator()
print('DISCRIMINATOR:')
discriminator.summary()
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

DISCRIMINATOR:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
primarycap_conv2d (Conv2D)   (None, 12, 12, 24)        624       
_________________________________________________________________
primarycap_reshape (Reshape) (None, 432, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 432, 8)            0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 3456)              0         
_________________________________________________________________
dense_15 (Dense)             (None, 512)               1769984   
_________________________________________________________________
leaky_re_lu_11 (LeakyReLU)   (None, 512)               0     

In [41]:
# build and compile the generator
generator = build_generator()
print('GENERATOR:')
generator.summary()
generator.compile(loss='binary_crossentropy', optimizer=optimizer)

GENERATOR:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 100)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 256)               25856     
_________________________________________________________________
leaky_re_lu_13 (LeakyReLU)   (None, 256)               0         
_________________________________________________________________
batch_normalization_7 (Batch (None, 256)               1024      
_________________________________________________________________
dense_19 (Dense)             (None, 512)               131584    
_________________________________________________________________
leaky_re_lu_14 (LeakyReLU)   (None, 512)               0         
_________________________________________________________________
batch_normalization_8 (Batch (None, 512)               2048      

In [42]:
# feeding noise to generator
z = Input(shape=(100,))
img = generator(z)
print(img)

Tensor("model_8/reshape_3/Reshape:0", shape=(?, 28, 28, 1), dtype=float32)


In [43]:
# for the combined model we will only train the generator
discriminator.trainable = False

In [44]:
# try to discriminate generated images
valid = discriminator(img)

In [45]:
# The combined model  (stacked generator and discriminator) takes
# noise as input => generates images => determines validity 
combined = Model(z, valid)
print('COMBINED:')
combined.summary()
combined.compile(loss='binary_crossentropy', optimizer=optimizer)

COMBINED:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 100)               0         
_________________________________________________________________
model_8 (Model)              (None, 28, 28, 1)         1493520   
_________________________________________________________________
model_7 (Model)              (None, 1)                 1902193   
Total params: 3,395,713
Trainable params: 1,489,936
Non-trainable params: 1,905,777
_________________________________________________________________


In [46]:
def train(epochs, batch_size=32, save_interval=50):

        # Load the dataset
        (X_train, _), (_, _) = mnist.load_data()

        # Rescale -1 to 1
        X_train = (X_train.astype(np.float32) - 127.5) / 127.5
        X_train = np.expand_dims(X_train, axis=3)

        half_batch = int(batch_size / 2)

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random half batch of images
            idx = np.random.randint(0, X_train.shape[0], 32)
            imgs = X_train[idx]

            noise = np.random.normal(0, 1, (32, 100))

            # Generate a half batch of new images
            gen_imgs = generator.predict(noise)

            # Train the discriminator
            d_loss_real = discriminator.train_on_batch(imgs, np.ones((32, 1)))
            d_loss_fake = discriminator.train_on_batch(gen_imgs, np.zeros((32, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)


            # ---------------------
            #  Train Generator
            # ---------------------

            noise = np.random.normal(0, 1, (32, 100))

            # The generator wants the discriminator to label the generated samples
            # as valid (ones)
            valid_y = np.array([1] * 32)

            # Train the generator
            g_loss = combined.train_on_batch(noise, valid_y)

            # Plot the progress
            print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))

            # If at save interval => save generated image samples
            if epoch % save_interval == 0:
                save_imgs(epoch)

In [47]:
def save_imgs(epoch):
        directory = "images"
        
        r, c = 5, 5
        noise = np.random.normal(0, 1, (r * c, 100))
        gen_imgs = generator.predict(noise)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5

        fig, axs = plt.subplots(r, c)
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i,j].imshow(gen_imgs[cnt, :,:,0], cmap='gray')
                axs[i,j].axis('off')
                cnt += 1
        
        if not os.path.exists(directory):
            os.makedirs(directory)
            
        fig.savefig("images/mnist_%d.png" % epoch)
        plt.close()

In [48]:
history = train(epochs=30000, batch_size=32, save_interval=1000)

  'Discrepancy between trainable weights and collected trainable'


0 [D loss: 0.697092, acc.: 31.25%] [G loss: 0.648412]
1 [D loss: 0.526538, acc.: 67.19%] [G loss: 0.636460]
2 [D loss: 0.432609, acc.: 65.62%] [G loss: 0.625972]
3 [D loss: 0.385533, acc.: 60.94%] [G loss: 0.623277]
4 [D loss: 0.363151, acc.: 68.75%] [G loss: 0.633295]
5 [D loss: 0.354919, acc.: 70.31%] [G loss: 0.660983]
6 [D loss: 0.343038, acc.: 82.81%] [G loss: 0.689124]
7 [D loss: 0.321535, acc.: 93.75%] [G loss: 0.733654]
8 [D loss: 0.312640, acc.: 95.31%] [G loss: 0.770008]
9 [D loss: 0.302951, acc.: 98.44%] [G loss: 0.831546]
10 [D loss: 0.293037, acc.: 96.88%] [G loss: 0.875533]
11 [D loss: 0.266429, acc.: 100.00%] [G loss: 0.951981]
12 [D loss: 0.238264, acc.: 98.44%] [G loss: 1.040798]
13 [D loss: 0.207030, acc.: 100.00%] [G loss: 1.100060]
14 [D loss: 0.200964, acc.: 100.00%] [G loss: 1.165431]
15 [D loss: 0.197183, acc.: 100.00%] [G loss: 1.234227]
16 [D loss: 0.173607, acc.: 100.00%] [G loss: 1.303516]
17 [D loss: 0.160376, acc.: 100.00%] [G loss: 1.390347]
18 [D loss: 0.

147 [D loss: 0.238847, acc.: 93.75%] [G loss: 4.216486]
148 [D loss: 0.207093, acc.: 90.62%] [G loss: 4.540550]
149 [D loss: 0.174738, acc.: 98.44%] [G loss: 3.969253]
150 [D loss: 0.102796, acc.: 98.44%] [G loss: 4.076587]
151 [D loss: 0.122443, acc.: 95.31%] [G loss: 4.674746]
152 [D loss: 0.207153, acc.: 92.19%] [G loss: 5.229479]
153 [D loss: 0.427265, acc.: 81.25%] [G loss: 5.239397]
154 [D loss: 0.173034, acc.: 95.31%] [G loss: 4.539791]
155 [D loss: 0.299379, acc.: 89.06%] [G loss: 4.714831]
156 [D loss: 0.318048, acc.: 85.94%] [G loss: 4.824844]
157 [D loss: 0.282144, acc.: 89.06%] [G loss: 4.768515]
158 [D loss: 0.264083, acc.: 89.06%] [G loss: 4.734094]
159 [D loss: 0.286536, acc.: 87.50%] [G loss: 4.618750]
160 [D loss: 0.215803, acc.: 93.75%] [G loss: 4.338694]
161 [D loss: 0.202287, acc.: 87.50%] [G loss: 4.443261]
162 [D loss: 0.258348, acc.: 90.62%] [G loss: 4.005817]
163 [D loss: 0.172844, acc.: 95.31%] [G loss: 4.116364]
164 [D loss: 0.180020, acc.: 92.19%] [G loss: 4.

KeyboardInterrupt: 

In [26]:
# Select a random half batch of images
idx = np.random.randint(0, X_train.shape[0], 32)
imgs = X_train[idx]

noise = np.random.normal(0, 1, (32, 100))

# Generate a half batch of new images
gen_imgs = generator.predict(noise)

In [27]:
print(gen_imgs.shape)
d_loss_fake = discriminator.train_on_batch(gen_imgs, np.zeros((32, 1)))

(32, 28, 28, 1)


  'Discrepancy between trainable weights and collected trainable'


In [28]:
print(imgs.shape)
d_loss_real = discriminator.train_on_batch(imgs, np.ones((32, 1)))

(32, 28, 28, 1)


  'Discrepancy between trainable weights and collected trainable'


In [29]:
#noise = np.random.normal(0, 1, (32, 100))
#print(noise.shape)

valid_y = np.array([1] * 32)
g_loss = combined.train_on_batch(noise, valid_y)
print(g_loss)

InvalidArgumentError: You must feed a value for placeholder tensor 'input_3' with dtype float and shape [?,28,28,1]
	 [[Node: input_3 = Placeholder[dtype=DT_FLOAT, shape=[?,28,28,1], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
	 [[Node: model_2/digitcaps/Reshape/_413 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_625_model_2/digitcaps/Reshape", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'input_3', defined at:
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\ipykernel\kernelapp.py", line 478, in start
    self.io_loop.start()
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\ipykernel\kernelbase.py", line 281, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\ipykernel\kernelbase.py", line 232, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\ipykernel\kernelbase.py", line 397, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\IPython\core\interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-fc416455af9f>", line 2, in <module>
    discriminator = build_discriminator()
  File "<ipython-input-10-739cbac8895b>", line 5, in build_discriminator
    img = layers.Input(shape=input_shape)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\keras\engine\topology.py", line 1439, in Input
    input_tensor=tensor)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\keras\legacy\interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\keras\engine\topology.py", line 1348, in __init__
    name=self.name)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\keras\backend\tensorflow_backend.py", line 497, in placeholder
    x = tf.placeholder(dtype, shape=shape, name=name)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1599, in placeholder
    return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 3090, in _placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
    op_def=op_def)
  File "C:\Users\husey_000\Miniconda3\envs\capsule-gans\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_3' with dtype float and shape [?,28,28,1]
	 [[Node: input_3 = Placeholder[dtype=DT_FLOAT, shape=[?,28,28,1], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
	 [[Node: model_2/digitcaps/Reshape/_413 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_625_model_2/digitcaps/Reshape", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
