In [1]:
from numpy import load
from numpy import zeros
from numpy import ones
from numpy.random import randint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Flatten
from matplotlib import pyplot
from PIL import Image
import os, sys
from sklearn.model_selection import train_test_split
import numpy

In [2]:
# Define the encoder block based on the original paper
def define_encoder_block(layer, filtersNo, batchnorm=True):
    
    # init weights from a Gaussian distribution with mean 0 and standard deviation 0.02
    init = RandomNormal(stddev=0.02)
    
    # in the original paper, all convolution kernels are (4,4), with stride 2. Stride for decoder means downsampling.
    x = Conv2D(filtersNo, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer)
    
    # Conditional batch normalization (important for the first layer)
    if batchnorm:
        x = BatchNormalization()(x, training=True)
        
    # All ReLUs in the encoder are leaky!
    x = LeakyReLU(alpha=0.2)(x)
    
    return x

In [3]:
# Define the decoder block based on the original paper
def decoder_block(layer, skip, filtersNo, dropout=True, batch=True):
    
    # init weights from a Gaussian distribution with mean 0 and standard deviation 0.02
    init = RandomNormal(stddev=0.02)
    
    # in the original paper, all convolution kernels are (4,4), with stride 2. Stride for decoder means upsampling.
    x = Conv2DTranspose(filtersNo, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer)

    # All layers in the original paper have batch normalization, although we set an if statement just to play with the model
    if batch:
        x = BatchNormalization()(x, training=True)
        
    # Some decoder layers don't have dropout
    if dropout:
        x = Dropout(0.5)(x, training=True)
        
    # Merge with skip connection
    x = Concatenate()([x, skip])
    
    # All ReLUs in the decoder are not leaky!
    x = Activation('relu')(x)
    
    return x

In [16]:
# Define the generator based on encoder/decoder
def define_generator():
    
    # init weights from a Gaussian distribution with mean 0 and standard deviation 0.02
    init = RandomNormal(stddev=0.02)
    
    # image input
    inputImage = Input(shape=(128,128,1))
    
    ###### Encoder
    
    # C64, input (128,128,1), output (64,64,64)
    encoderLayer1 = define_encoder_block(inputImage, 64, batchnorm=False)
    
    #C128, input (64,64,64), output (32,32,128)
    encoderLayer2 = define_encoder_block(encoderLayer1, 128)
    
    #C256, input (32,32,128), output (16,16,256)
    encoderLayer3 = define_encoder_block(encoderLayer2, 256)
    
    #C512, input (16,16,256), output (8,8,512)
    encoderLayer4 = define_encoder_block(encoderLayer3, 512)
    
    #C512, input (8,8,512), output (4,4,512)
    encoderLayer5 = define_encoder_block(encoderLayer4, 512)
    
    #C512, input (4,4,512), output (2,2,512)
    encoderLayer6 = define_encoder_block(encoderLayer5, 512)
    
    ###### Bottleneck layer, will have an input of (2,2,512) and an output of (1,1,512)
    bottleneck = Conv2D(512, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(encoderLayer6)
    bottleneck = Activation('relu')(bottleneck)
    
    ###### Decoder, with skip connection
    
    #CD512
    decoderLayer1 = decoder_block(bottleneck, encoderLayer6, 512)
    
    #CD512
    decoderLayer2 = decoder_block(decoderLayer1, encoderLayer5, 512)
    
    #C512
    decoderLayer3 = decoder_block(decoderLayer2, encoderLayer4, 512, dropout=False)
    
    #C256
    decoderLayer4 = decoder_block(decoderLayer3, encoderLayer3, 256, dropout=False)
    
    #C128
    decoderLayer5 = decoder_block(decoderLayer4, encoderLayer2, 128, dropout=False)
    
    #C64
    decoderLayer6 = decoder_block(decoderLayer5, encoderLayer1, 64, dropout=False)
    
    # Output with tanh function, as mentioned in the original paper. Output will be (128x128x3)
    g = Conv2DTranspose(3, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(decoderLayer6)
    outputImage = Activation('tanh')(g)
    
    # Define model
    model = Model(inputImage, outputImage)
    
    return model

In [5]:
# Define the 70x70 discriminator as in the original paper
def define_discriminator():
    
    # init weights from a Gaussian distribution with mean 0 and standard deviation 0.02
    init = RandomNormal(stddev=0.02)
    
    # source image input
    source = Input(shape=(128,128,1))
    
    # target image input
    target = Input(shape=(128,128,3))
    
    # concatenate images channel-wise
    merged = Concatenate()([source, target])
    
    # C64
    d = Conv2D(64, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(merged)
    d = LeakyReLU(alpha=0.2)(d)
    
    # C128
    d = Conv2D(128, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
    d = BatchNormalization()(d)
    d = LeakyReLU(alpha=0.2)(d)
    
    # C256
    d = Conv2D(256, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
    d = BatchNormalization()(d)
    d = LeakyReLU(alpha=0.2)(d)
    
    # C512
    d = Conv2D(512, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(d)
    d = BatchNormalization()(d)
    d = LeakyReLU(alpha=0.2)(d)
    
    # patch output
    d = Conv2D(1, (8,8), strides=(8,8), padding='same', kernel_initializer=init)(d)
    patch_out = Activation('sigmoid')(d)
    patch_out = Flatten()(patch_out)
    
    # define model
    model = Model([source, target], patch_out)
    
    # compile model
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt, loss_weights=[0.5])
    return model

In [6]:
# Define Pix2Pix GAN
def pix2pix(generator, discriminator):
    
    # make weights in the discriminator not trainable
    for layer in discriminator.layers:
        if not isinstance(layer, BatchNormalization):
            layer.trainable = False
            
    # define the source image
    source = Input(shape=(128,128,1))
    
    # connect the source image to the generator input
    genOut = generator(source)
    
    # connect the source input and generator output to the discriminator input
    disOut = discriminator([source, genOut])
    
    # src image as input, generated image and classification output
    model = Model(source, [disOut, genOut])
    
    # compile model
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss=['binary_crossentropy', 'mae'], optimizer=opt, loss_weights=[1,100])
    
    return model

In [7]:
def generateFromDataset(trainX, trainY, samples):
    
    # Choose random images from both input and output
    no = randint(0, trainX.shape[0], samples)
    gx, gy = trainX[no], trainY[no]
    
    # Set y-labels to 1, as these images are from dataset
    y = ones((samples, 1))
    return [gx, gy], y

In [8]:
def generateFromGenerator(generator, samples):
    # Generate fake instance
    x = generator.predict(samples)
    
    # Labels will be zero because they come from the generator
    y = zeros((len(x), 1))
    
    return x, y

In [9]:
def summarize_performance(step, g_model, trainX, trainY, n_samples=3):
    # select a sample of input images
    [X_realA, X_realB], _ = generateFromDataset(trainX, trainY, n_samples)
    
    # generate a batch of fake samples
    X_fakeB, _ = generateFromGenerator(g_model, X_realA)
    
    # scale all pixels from [-1,1] to [0,1]
    #X_realA = (X_realA + 1) / 2.0
    #X_realB = (X_realB + 1) / 2.0
    #X_fakeB = (X_fakeB + 1) / 2.0
    
    # plot real source images
    for i in range(n_samples):
        pyplot.subplot(3, n_samples, 1 + i)
        pyplot.axis('off')
        pyplot.imshow(X_realA[i])
    # plot generated target image
    for i in range(n_samples):
        pyplot.subplot(3, n_samples, 1 + n_samples + i)
        pyplot.axis('off')
        pyplot.imshow(X_fakeB[i])
    # plot real target image
    for i in range(n_samples):
        pyplot.subplot(3, n_samples, 1 + n_samples*2 + i)
        pyplot.axis('off')
        pyplot.imshow(X_realB[i])
    # save plot to file
    filename1 = 'plot_%06d.png' % (step+1)
    pyplot.savefig(filename1)
    pyplot.close()
    # save the generator model
    filename2 = 'model_%06d.h5' % (step+1)
    g_model.save(filename2)
    print('>Saved: %s and %s' % (filename1, filename2))

In [23]:
def train(discriminator, generator, gan, epochs=100000, samplesPerEpoch=250):
    
    # Load the data
    bwData = load("Flickr8kblackandwhite1dim.npy")
    colorData = load("flickr8k_shuffled.npy")
    y = numpy.ones((8091,1))
    
    BWSplit = numpy.array_split(bwData, 2)
    colorSplit = numpy.array_split(colorData, 2)
    
    # manually enumerate epochs
    for i in range(epochs):
        
        # Generate real samples
        [realX, realY], realLabel = generateFromDataset(BWSplit[0], colorSplit[0], samplesPerEpoch)
        
        # Generate fake samples
        fakeY, fakeLabel = generateFromGenerator(generator, realX)
        
        # Update discriminator on real samples
        realLoss = discriminator.train_on_batch([realX, realY], realLabel)
        
        # Update discriminator on fake samples
        fakeLoss = discriminator.train_on_batch([realX, fakeY], fakeLabel)
        
        # Update generator
        generatorLoss, _, _ = gan.train_on_batch(realX, [realLabel, realY])
        
        # summarize performance
        print('>%d, d1[%.3f] d2[%.3f] g[%.3f]' % (i+1, realLoss, fakeLoss, generatorLoss))
        if i*samplesPerEpoch % 2500 == 0:
            summarize_performance(i, generator, BWSplit[0], colorSplit[0])

In [21]:
d = define_discriminator()
g = define_generator()
p2p = pix2pix(g,d)

In [None]:
train(d,g,p2p)

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>1, d1[0.006] d2[0.001] g[14.429]
>Saved: plot_000001.png and model_000001.h5
>2, d1[0.005] d2[0.008] g[13.714]
>3, d1[0.004] d2[0.007] g[10.899]
>4, d1[0.003] d2[0.011] g[10.149]
>5, d1[0.003] d2[0.010] g[10.146]
>6, d1[0.003] d2[0.030] g[9.923]
>7, d1[0.005] d2[0.022] g[9.728]
>8, d1[0.003] d2[0.008] g[9.675]
>9, d1[0.006] d2[0.001] g[9.487]
>10, d1[0.005] d2[0.015] g[9.695]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>11, d1[0.002] d2[0.088] g[9.441]
>Saved: plot_000011.png and model_000011.h5
>12, d1[0.006] d2[0.001] g[9.503]
>13, d1[0.006] d2[0.011] g[9.354]
>14, d1[0.011] d2[0.000] g[9.436]
>15, d1[0.007] d2[0.001] g[9.441]
>16, d1[0.006] d2[0.001] g[9.008]
>17, d1[0.004] d2[0.081] g[9.307]
>18, d1[0.008] d2[0.000] g[8.824]
>19, d1[0.015] d2[0.000] g[9.347]
>20, d1[0.005] d2[0.001] g[9.460]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>21, d1[0.005] d2[0.001] g[13.215]
>Saved: plot_000021.png and model_000021.h5
>22, d1[0.002] d2[0.000] g[17.613]
>23, d1[0.003] d2[0.000] g[16.984]
>24, d1[0.002] d2[0.000] g[12.467]
>25, d1[0.002] d2[0.001] g[13.487]
>26, d1[0.002] d2[0.023] g[9.874]
>27, d1[0.002] d2[0.002] g[9.391]
>28, d1[0.003] d2[0.035] g[9.592]
>29, d1[0.004] d2[0.222] g[9.496]
>30, d1[0.069] d2[0.008] g[9.996]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>31, d1[0.826] d2[0.000] g[19.382]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>Saved: plot_000031.png and model_000031.h5
>32, d1[5.263] d2[0.095] g[10.240]
>33, d1[1.001] d2[2.065] g[9.222]
>34, d1[0.244] d2[0.988] g[11.169]
>35, d1[1.724] d2[0.586] g[10.411]
>36, d1[1.202] d2[1.093] g[10.289]
>37, d1[0.836] d2[0.853] g[9.970]
>38, d1[0.785] d2[0.470] g[9.556]
>39, d1[0.336] d2[0.464] g[9.543]
>40, d1[0.303] d2[0.494] g[9.478]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>41, d1[0.354] d2[0.337] g[9.466]
>Saved: plot_000041.png and model_000041.h5
>42, d1[0.324] d2[0.257] g[9.408]
>43, d1[0.209] d2[0.249] g[9.238]
>44, d1[0.201] d2[0.104] g[9.716]
>45, d1[0.604] d2[0.330] g[8.715]
>46, d1[0.052] d2[0.741] g[9.172]
>47, d1[0.376] d2[0.284] g[8.896]
>48, d1[0.117] d2[0.086] g[8.732]
>49, d1[0.034] d2[0.222] g[9.017]
>50, d1[0.076] d2[0.068] g[9.239]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>51, d1[0.060] d2[0.025] g[8.532]
>Saved: plot_000051.png and model_000051.h5
>52, d1[0.031] d2[0.048] g[8.817]
>53, d1[0.010] d2[0.062] g[8.711]
>54, d1[0.016] d2[0.082] g[8.652]
>55, d1[0.027] d2[0.021] g[8.640]
>56, d1[0.017] d2[0.023] g[8.606]
>57, d1[0.016] d2[0.012] g[8.649]
>58, d1[0.009] d2[0.010] g[8.519]
>59, d1[0.005] d2[0.003] g[8.409]
>60, d1[0.008] d2[0.050] g[8.973]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>61, d1[0.007] d2[0.021] g[8.656]
>Saved: plot_000061.png and model_000061.h5
>62, d1[0.008] d2[0.020] g[8.617]
>63, d1[0.012] d2[0.053] g[8.829]
>64, d1[0.009] d2[0.009] g[8.456]
>65, d1[0.008] d2[0.008] g[8.626]
>66, d1[0.010] d2[0.002] g[8.185]
>67, d1[0.006] d2[0.028] g[8.641]
>68, d1[0.007] d2[0.005] g[8.672]
>69, d1[0.005] d2[0.001] g[10.371]
>70, d1[0.003] d2[0.003] g[13.125]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>71, d1[0.004] d2[0.003] g[11.060]
>Saved: plot_000071.png and model_000071.h5
>72, d1[0.003] d2[0.004] g[12.221]
>73, d1[0.002] d2[0.044] g[10.038]
>74, d1[0.005] d2[0.009] g[11.716]
>75, d1[0.005] d2[0.030] g[9.367]
>76, d1[0.005] d2[0.014] g[9.088]
>77, d1[0.007] d2[0.003] g[8.621]
>78, d1[0.003] d2[0.011] g[8.576]
>79, d1[0.004] d2[0.005] g[8.584]
>80, d1[0.004] d2[0.003] g[8.454]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>81, d1[0.004] d2[0.002] g[8.602]
>Saved: plot_000081.png and model_000081.h5
>82, d1[0.002] d2[0.021] g[8.485]
>83, d1[0.004] d2[0.084] g[8.307]
>84, d1[0.013] d2[0.001] g[8.260]
>85, d1[0.010] d2[0.008] g[8.485]
>86, d1[0.009] d2[0.002] g[9.326]
>87, d1[0.005] d2[0.000] g[13.670]
>88, d1[0.004] d2[0.001] g[10.173]
>89, d1[0.003] d2[0.002] g[13.736]
>90, d1[0.003] d2[0.003] g[12.899]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>91, d1[0.004] d2[0.004] g[9.480]
>Saved: plot_000091.png and model_000091.h5
>92, d1[0.002] d2[0.012] g[9.029]
>93, d1[0.002] d2[0.001] g[8.694]
>94, d1[0.002] d2[0.010] g[8.672]
>95, d1[0.002] d2[0.001] g[8.331]
>96, d1[0.002] d2[0.001] g[8.352]
>97, d1[0.002] d2[0.013] g[8.461]
>98, d1[0.002] d2[0.001] g[8.090]
>99, d1[0.002] d2[0.002] g[8.218]
>100, d1[0.003] d2[0.004] g[8.364]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>101, d1[0.001] d2[0.005] g[8.151]
>Saved: plot_000101.png and model_000101.h5
>102, d1[0.002] d2[0.003] g[8.360]
>103, d1[0.003] d2[0.006] g[8.181]
>104, d1[0.002] d2[0.001] g[8.018]
>105, d1[0.002] d2[0.001] g[8.406]
>106, d1[0.002] d2[0.001] g[7.948]
>107, d1[0.002] d2[0.003] g[8.218]
>108, d1[0.002] d2[0.001] g[7.974]
>109, d1[0.002] d2[0.001] g[8.115]
>110, d1[0.001] d2[0.009] g[7.965]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>111, d1[0.001] d2[0.001] g[7.920]
>Saved: plot_000111.png and model_000111.h5
>112, d1[0.002] d2[0.003] g[8.306]
>113, d1[0.001] d2[0.001] g[8.178]
>114, d1[0.001] d2[0.001] g[7.881]
>115, d1[0.002] d2[0.000] g[8.348]
>116, d1[0.001] d2[0.001] g[8.172]
>117, d1[0.001] d2[0.001] g[8.084]
>118, d1[0.001] d2[0.004] g[7.694]
>119, d1[0.001] d2[0.001] g[8.215]
>120, d1[0.001] d2[0.001] g[9.635]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>121, d1[0.001] d2[0.002] g[9.812]
>Saved: plot_000121.png and model_000121.h5
>122, d1[0.001] d2[0.001] g[10.066]
>123, d1[0.001] d2[0.003] g[9.178]
>124, d1[0.001] d2[0.002] g[10.739]
>125, d1[0.001] d2[0.000] g[9.668]
>126, d1[0.001] d2[0.005] g[9.000]
>127, d1[0.001] d2[0.006] g[8.424]
>128, d1[0.001] d2[0.001] g[8.375]
>129, d1[0.001] d2[0.002] g[8.039]
>130, d1[0.001] d2[0.008] g[8.014]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>131, d1[0.001] d2[0.001] g[8.037]
>Saved: plot_000131.png and model_000131.h5
>132, d1[0.001] d2[0.000] g[8.074]
>133, d1[0.001] d2[0.003] g[8.247]
>134, d1[0.001] d2[0.002] g[7.801]
>135, d1[0.001] d2[0.009] g[7.881]
>136, d1[0.001] d2[0.002] g[7.944]
>137, d1[0.001] d2[0.013] g[7.743]
>138, d1[0.002] d2[0.000] g[7.881]
>139, d1[0.001] d2[0.002] g[8.212]
>140, d1[0.002] d2[0.001] g[8.074]


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>141, d1[0.001] d2[0.000] g[7.916]
