In [11]:
import skimage
import skimage.io
import skimage.transform
import numpy
import torchfile
import numpy as np
import os, time
import tensorflow as tf
from matplotlib import pyplot as plt
import pickle


In [4]:
class Settings(object):
    """
    This class holds all the information about the training process in a central and accesable place. 

    Attributes:
        restore:        Indicates whether weights should be restored from an previos run
        epochs:         Number of epochs
        optimizer:      Specifies the kind of optimizer used for training the generator and the discriminator
        batch_size:     Number of samples in a batch
        run:            specifies the run
        momentum:       Momentum used for Adam Optimizer
        lr:             Learning rate
        data_path:      Path to data folder
        data_set:       Which data set should be used 
        keep_prop:      Keep probability for dropout layer
        show_captions:  Boolean which determines if generated images are stored with text descriptions or without
        

    """
    restore    = False
    epochs     = 1000
    optimizer  = tf.train.AdamOptimizer()
    batch_size = 64
    run        = 3000
    momentum   = 0.5
    lr         = 0.0002
    data_path  = "/data"
    data_set   = "flower_embeddings_custom.npy"
    keep_prop  = 0.5
    show_captions = True

In [5]:
# Print iterations progress from https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
    # Print New Line on Complete
    if iteration == total: 
        print()

In [6]:
class DataLoader:
'''
Loads the data set which is specified in the Settings

Attributes:
    directory      : Directory to the folder where the data ist stored
    image_size     : The size to which all images in the data set get reshaped to 
    load in memory : Booelan that determines if all images of the data set are load into the working memory at the start
                    (speeds up the get_batches function) 
    
'''

    #image size is the desired dimension of all images that are in the batch. The are downsampled by the data loader.
    #load_into_memory specifies whether the downsampled images should be laoded into memory. This can speed up
    #                 training significantly. But of course it's not possbile to very large datasets
    def __init__(self, directory, image_size=64, load_into_memory=False):
    '''
    Constructor
    '''
        print("## BUILD DataLoader ##")
        self.load_into_memory = load_into_memory
        self.directory = directory
        self.image_size = image_size
        self.data = self.load_binaries()
        print(np.shape(self.data))
        print("Nr. Trainingsamples: " + str(np.shape(self.data)[0]))

    def load_binaries(self):
        try:
            print("[Info] loading " + self.directory + "/" + Settings.data_set)
            data = np.load(self.directory + "/" + Settings.data_set)
            if self.load_into_memory:
                nr_images = np.shape(data)[0]
                print("[Attention] Loading "+str(nr_images)+" images with dimensions "+str(self.image_size)+"x"+str(self.image_size)+"x3 into memory!!")
                printProgressBar(0, nr_images, prefix = 'Progress:', suffix = 'Complete', length = 50)
                for i, row in enumerate(data):
                    img_file = row[1]
                    img_matrix = self.load_image(str(img_file))
                    row[2] = img_matrix
                    printProgressBar(i + 1, nr_images, prefix = 'Progress:', suffix = 'Complete', length = 50)
            return data
        except FileNotFoundError:
            print("[Error] "+ self.directory + "/" + Settings.data_set + " not found!")
            sys.exit()

        data = []
        with open(self.directory + '/flowers_icml/trainvalclasses.txt') as file:
            train_classes = file.readlines()
        train_classes = [x.strip() for x in train_classes]

        # go through all training classes to extract the captions
        for c in train_classes:
            #create a list of all files within the folder corresponding to the class/category
            filelist = os.listdir(self.directory + '/flowers_icml/' + c)
            #loop through all the files
            for file in filelist:
                #check if it's actually a t7 file. Only these contain the information about the images
                if file.endswith(".t7"):
                    # load the torchfile
                    _tmp = torchfile.load(self.directory + '/flowers_icml/' + c + "/" + file)
                    caption = _tmp.txt
                    img_file = _tmp.img
                    # add the caption the filename of the image to the loaded data
                    img_matrix = self.load_image(str(img_file))
                    data.append([caption, img_file, img_matrix])


        print(np.shape(data))
        print("return binaries")
        return data


    def load_image(self, image_file):
        img = skimage.io.imread(self.directory + "/" + image_file)
        # GRAYSCALE
        if len(img.shape) == 2:
            print("GrayScale Image -- " + image_file + "\n")
            img_new = np.ndarray( (img.shape[0], img.shape[1], 3), dtype = 'uint8')
            img_new[:,:,0] = img
            img_new[:,:,1] = img
            img_new[:,:,2] = img
            img = img_new

        img = skimage.transform.resize(img, (self.image_size, self.image_size),mode='reflect')
        return img

    def get_batches(self, batch_size):
    '''
    Creates a batch of a specific size with captions, embeddings, and two types of images.
    
    @param:
        batch_size     : number of samples 
    @returns:
        text_captions  : some text descriptions
        real_embeddings: the text descriptions in embedded form
        real_images    : images that go along with the text captions
        wrong_images   : images that do not fit to the text captions
    '''
        #total number of samples
        n = np.shape(self.data)[0]
        if batch_size <= 0:
            batch_size = n

        # shuffle the data to get random batches
        random_indeces = np.random.choice(n, n, replace = False)
        data_pool = np.array(self.data)[random_indeces, :]

        wrong_random_indices = np.random.choice(n, n, replace = False)
        wrong_data_pool = np.array(self.data)[wrong_random_indices, :]

        #create the batches
        for i in range(n // batch_size):
            on = i * batch_size
            off = on + batch_size

            wrong_batch = wrong_data_pool[on:off]
            batch = data_pool[on:off]

            for w_entry, entry in zip(wrong_batch, batch):

                if not self.load_into_memory:
                    _tmpw = self.load_image(str(w_entry[1]))
                    w_entry[1] = _tmpw

                    _tmp = self.load_image(str(entry[1]))
                    entry[1] = _tmp
                    ## Fill correct batch

                # get 5 randomly selected captions for each image(out of 10 correct ones)
                indecies_random_captions = np.random.choice(min(len(entry[0]), 5), 1, replace = False)
                entry[0] = entry[0][indecies_random_captions]
                entry[3] = entry[3][indecies_random_captions[0]]

            real_embeddings = np.array([i[0] for i in batch[:,0]])
            text_captions = np.array([i for i in batch[:,3]])

            real_images = np.array([i for i in batch[:,2]])
            wrong_images = np.array([i for i in wrong_batch[:,2]])

            yield real_embeddings, real_images, wrong_images, text_captions


In [7]:
t_test = time.time()


# data_loader = DataLoader("data", load_into_memory=True)
# gen = data_loader.get_batches(10)
# real_embeddings, real_images, wrong_images, captions = next(gen)
# print(np.shape(real_embeddings))
# print(np.shape(real_images))
# print(np.shape(wrong_images))
# print(np.shape(captions))

# print(np.shape(real_embeddings[0]))



# t_test = time.time() - t_test
# minutes_t, seconds_t = divmod(t_test, 60)
# print(("-- Loaded Binaries in {0: .0f}m{1: .2f}s.").format( minutes_t,seconds_t))
# t_test = time.time()
# idx = 1

# t_total = time.time()

# t_test = time.time()
# for real_embeddings, real_images, wrong_images, captions in data_loader.get_batches(64):
#     t_test = time.time() - t_test
#     minutes_t, seconds_t = divmod(t_test, 60)
#     print(("-- Finished Batch #{0} in {1: .0f}m{2: .2f}s.").format(idx +1, minutes_t,seconds_t))
#     t_test = time.time()
#     idx += 1

    
# t_total = time.time() - t_total
# minutes_t, seconds_t = divmod(t_total, 60)
   
# print(("-- Finished Epoch in {0: .0f}m{1: .2f}s.").format(minutes_t,seconds_t))

# t_test = time.time()
# for real_embeddings, real_images, wrong_images, captions in data_loader.get_batches(64):
#     t_test = time.time() - t_test
#     minutes_t, seconds_t = divmod(t_test, 60)
#     print(("-- Finished Batch #{0} in {1: .0f}m{2: .2f}s.").format(idx +1, minutes_t,seconds_t))
#     t_test = time.time()
#     idx += 1

    
# t_total = time.time() - t_total
# minutes_t, seconds_t = divmod(t_total, 60)
   
# print(("-- Finished Epoch in {0: .0f}m{1: .2f}s.").format(minutes_t,seconds_t))
  

#Generative Adversarial Network

In [19]:
################################################################
##### Generative Adversarial Network  ##########################
################################################################

class GAN(object):
    """
    This class creates a tensorflow graph for a Generative Adversarial Network conditioned on text embeddings.

    Inorder to run all nodes in the tensorflow graph the following placeholders must be filled:
    self.ist_training           : Boolean which displays if the network is in training mode or not, this information is important for batch normalization 

    self.z_vector               : Noise vector that is provided as input to the generator
    self.text_embeddings        : Text embeddings that are provided as input to the Generator

    self.text_embeddings_real   : Text embeddings provided to the Discriminator together with correct images
    self.real_images            : Correct images presented with the real text embeddings

    self.text_embeddings_wrong  : Embeddings from the training data that are coupled up with images that do not belong to the embeddings
    self.wrong_images           : Images from the training data, that are presented to the Discriminator together with wrong embeddings

    """
    def __init__(self):
        """
            Constructor
        """
        print("\n###################################")
        print("#           Build Network          #")
        print("###################################\n")
        
        
        self.graph = tf.Graph()
        with self.graph.as_default(): 
            self.build_graph()
    

    def build_graph(self):
        '''
        Calls the build Generator function. Concatinates the Generator and Discriminator by feeding the generated
        images by the Generator into the Discriminator. Calls the build Discriminator function. Defines two 
        separate loss function one for the Discriminator and one for the Generator and two separate optimizer objects.
    
        '''
        self.ist_training = tf.placeholder(tf.bool, None)
        
        #########Embedding Placeholders########
        self.text_embeddings = tf.placeholder(tf.float32,shape=[None,1024], name="text_embeddings")
        self.text_embeddings_real = tf.placeholder(tf.float32,shape=[None,1024], name="real_text_embeddings")
        self.text_embeddings_wrong = tf.placeholder(tf.float32,shape=[None,1024], name="wrong_text_embeddings")
        
        ###### Build Generator####
        self.build_generator()
        
        ####### Placeholders for Images####
        with tf.variable_scope("real_images"):
            self.real_images = batch_norm(tf.placeholder(tf.float32, shape=[Settings.batch_size,64, 64, 3], name="real_images"),[0,1,2], self.ist_training)
                                                                                                                    #64,64,3]))
            
        with tf.variable_scope("wrong_images"):
            self.wrong_images = batch_norm(tf.placeholder(tf.float32,shape=[Settings.batch_size,64, 64, 3], name="wrong_images"), [0,1,2], self.ist_training)                                                                                                                          #64,64,3]))
        
        dis_real_logits, dis_real_sigmoid   = self.discriminator(self.real_images, self.text_embeddings_real)
        dis_wrong_logits, dis_wrong_sigmoid = self.discriminator(self.wrong_images, self.text_embeddings_wrong, reuse = True)
        dis_fake_logits, dis_fake_image     = self.discriminator(self.generated_images, self.text_embeddings, reuse = True)
               
        ######Build Optimizer######
#         gen_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_fake_logits, labels=tf.ones_like(dis_fake_logits)))
        
#         dis_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_real_logits, labels=tf.ones_like(dis_real_logits)))
#         dis_loss_wrong = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_wrong_logits, labels=tf.zeros_like(dis_wrong_logits)))
#         dis_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_fake_logits, labels=tf.zeros_like(dis_fake_logits)))
        
#         dis_loss = dis_loss_real + ((dis_loss_wrong + dis_loss_fake) * 0.5)
        
        ####OTHER####
        real_score  = tf.reduce_mean(dis_real_sigmoid)
        wrong_score = tf.reduce_mean(dis_wrong_sigmoid)
        fake_score  = tf.reduce_mean(dis_fake_image )
        dis_loss = -((tf.log(real_score) + (tf.log(1 - wrong_score) + tf.log(1 - fake_score)) / 2))
        gen_loss = -tf.log(fake_score)
  
        self.dis_summary = tf.summary.scalar('Discriminator_Loss',dis_loss)
        self.gen_summary = tf.summary.scalar('Generator_Loss',gen_loss)
        
        ##########################variable lists#########################################
        trainable_variables = tf.trainable_variables()
        dis_variables = [var for var in trainable_variables if "dis" in var.name]
        gen_variables = [var for var in trainable_variables if "gen" in var.name]

        
        #############################Optimizer#############
        with tf.variable_scope("optimizer"):
            self.dis_optimizer = tf.train.AdamOptimizer(Settings.lr, beta1=Settings.momentum).minimize(dis_loss, var_list=dis_variables)
            self.gen_optimizer = tf.train.AdamOptimizer(Settings.lr, beta1=Settings.momentum).minimize(gen_loss, var_list=gen_variables)

        
        
    ###################################################
    ######## BUILD GENERATOR##########################
    def build_generator(self):
        '''Builds the Generator
    
        First it reduces the given text embedding using a feed forward layer with 128 nodes. Then concatinates the reduced
        embeddings with the noise vector z and reshapes it to 1024 feature maps of size 4x4. Then it upsamples the size of 
        the feature maps to 8x8, then 16x16, then 32x32 and then to 64x64. In each layer batch normaization is and 
        stride 2 deconvolution is used. Dropout is applied to layer two and four.
    
        '''
        
       ######## Inputs ##########
        self.z_vector = tf.placeholder(tf.float32, shape=[None,100], name="z")
        with tf.variable_scope("gen_reduce_embeddings"):
            #####norm here#####
            reduced_embeddings=feedforward_layer(self.text_embeddings,[1024,128],activation=lrelu, is_training=self.ist_training)
            concat_input=tf.concat([self.z_vector,reduced_embeddings],1)
        
        
        #####layer1######
        with tf.variable_scope("gen_first_layer"):
            drive_first=feedforward_layer(concat_input,[228,4*4*1024], is_training=self.ist_training)
            reshaped_first=tf.reshape(drive_first,[Settings.batch_size,4,4,1024])
            output_first=tf.nn.relu(batch_norm(reshaped_first,[0,1,2], self.ist_training))
       
        ####layer2####
        with tf.variable_scope("gen_second_layer"):
            #welche kernel größe??
            #deconv_layer(input, target_shape, filter, strides, padding, bias_init, norm_axes=[0,1,2], normalize=False, activation=None)
            d_second_layer = deconv_layer(output_first,[Settings.batch_size,8,8,512],[5,5,512,1024],[1,2,2,1],"SAME",normalize=True,activation=tf.nn.relu, is_training=self.ist_training)
            second_layer=tf.nn.dropout(d_second_layer,Settings.keep_prop)
        
        #####layer3#####
        with tf.variable_scope("gen_third_layer"):
            third_layer = deconv_layer(second_layer,[Settings.batch_size,16,16,256],[5,5,256,512], [1,2,2,1],"SAME", normalize=True, activation=tf.nn.relu, is_training=self.ist_training)
        
        ######layer4######
        with tf.variable_scope("gen_fourth_layer"):
            d_fourth_layer=deconv_layer(third_layer,[Settings.batch_size,32,32,128],[5,5,128,256], [1,2,2,1],"SAME", normalize=True, activation=tf.nn.relu, is_training=self.ist_training)
            fourth_layer=tf.nn.dropout(d_fourth_layer,Settings.keep_prop)
        
        #####layer5#####
        with tf.variable_scope("gen_fifth_layer"):
            fifth_layer=deconv_layer(fourth_layer,[Settings.batch_size,64,64,3],[5,5,3,128], [1,2,2,1],"SAME",normalize=True, activation=tf.nn.tanh, is_training=self.ist_training)
            ###VORSICHT wir haben es zur tanh geaendert
        self.generated_images = (fifth_layer + 1) * 0.5
        #return self.generated_images
                                

    def discriminator(self, discriminator_images, text_embeddings, reuse=False):
        '''
        Discriminator receives a batch of pictures of the same typ as input e.g. batch of generated images, batch of real images OR
        batch of wrong images. In the fourth layer a batch of text embeddings get provided as information to the discriminator. The discriminaor 
        outputs the probabilty of the given images to be real and fitting to the provided text embeddings.
    
        @param:
            disciminator_images: batch of images
            text_embeddings    : batch of text_embeddings
            reuse              : Boolean which determines if the variable_scope will be reused or not
                            (needed because we are calling the same discriminator three times with the three typs of data)
        @return:
            state6             : logits of the last layer 
            tf.sigmoid(state6) : sigmoidal activation of the last layer
        '''
        
        ############inputs############
        with tf.variable_scope("dis_batch_norm", reuse=reuse):
            input_images = batch_norm(discriminator_images, [0,1,2], self.ist_training)
        ## VORSICHT BATCHNORM HINZUGEFUEGT
        
        
        with tf.variable_scope("dis_first_layer", reuse=reuse):
            #64x64x3
            state_1 = conv2d_layer(input_images, filter=[5,5,3,64], strides=[1,2,2,1], padding="SAME", normalize=True, activation=lrelu, is_training=self.ist_training)
        with tf.variable_scope("dis_second_layer", reuse=reuse):
            #32x32x128
            state_2 = conv2d_layer(state_1, filter=[5,5,64,32], strides=[1,2,2,1], padding="SAME", normalize=True, activation=lrelu, is_training=self.ist_training)
        with tf.variable_scope("dis_third_layer", reuse=reuse):
            #16x16x256
            state_3 = conv2d_layer(state_2, filter=[5,5,32,16], strides=[1,2,2,1], padding="SAME", normalize=True, activation=lrelu,  is_training=self.ist_training)
        with tf.variable_scope("dis_fourth_layer", reuse=reuse):
            #8x8x512
            state_4 = conv2d_layer(state_3, filter=[5,5,16,4], strides=[1,2,2,1], padding="SAME", normalize=True, activation=lrelu,  is_training=self.ist_training)
            #out: 4x4x1024

        ####  ADD TEXT EMBEDDINGS TO NETWORK  ####
        with tf.variable_scope("dis_reduce_embeddings", reuse=reuse):
            reduced_embeddings = feedforward_layer(text_embeddings, [1024,128], activation=tf.nn.relu,  is_training=self.ist_training)
            reduced_embeddings = tf.expand_dims(reduced_embeddings,1)
            reduced_embeddings = tf.expand_dims(reduced_embeddings,2)
            tiled_embeddings = tf.tile(reduced_embeddings, [1,4,4,1], name='tile_embeddings')
        
        ### CONCAT TEXT EMBEDDINGS AND STATE_4  ###
        
        with tf.variable_scope("dis_concat_layer", reuse=reuse):
            state_4_concat = tf.concat([state_4, tiled_embeddings], 3)
            
        with tf.variable_scope("dis_fifth_layer", reuse=reuse):
            state_5 = conv2d_layer(state_4_concat, filter=[1,1,132,4], strides=[1,1,1,1], padding="SAME", normalize=True, activation=tf.nn.relu, is_training=self.ist_training)
            #out: 4x4x132 
            ### hier normalizieren?? oder danach
            state5_flat = tf.reshape(state_5, [Settings.batch_size, -1])
            ####oder hier??
        with tf.variable_scope("dis_sixth_layer", reuse=reuse):
            #TODO PUT ACTIVATION FUNTION BACK
            state_6 = feedforward_layer(state5_flat, [64, 1], norm_axes=[0], normalize=False, activation=None, is_training=self.ist_training)
        
        self.dis_out = state_6
        
        return state_6, tf.nn.sigmoid(state_6)
    
        
    

def conv2d_layer(input, filter, strides, padding, bias_init=0.0, norm_axes=[0,1,2], normalize=False, activation=None, is_training=None):
    depth = input.shape[-1]
    fan_in = int(input.shape[1] * input.shape[2])
    
    if activation == tf.nn.relu or activation== lrelu:
        var_init = tf.random_normal_initializer(stddev = 2/fan_in)
    else:
        var_init = tf.random_normal_initializer(stddev = fan_in**(-1/2))
    W = tf.get_variable('weights', filter, initializer=var_init)
    #variable summaries
    b = tf.get_variable('biases', filter[-1], initializer=tf.constant_initializer(bias_init))
    #variable summaries

    state = tf.nn.conv2d(input,W, strides, padding) + b
    #state_depth=state.shape[-1]

    if normalize:
        state=batch_norm(state,norm_axes, is_training)
         
    conv_out = state

    if not(activation is None):
        conv_out = activation(state)
    
    return conv_out

def deconv_layer(input, target_shape, filter, strides, padding, bias_init=0.0, norm_axes=[0,1,2], normalize=False, activation=None, is_training=None):
    depth = input.shape[-1]
    fan_in = int(input.shape[1] * input.shape[2])
    
    if activation == tf.nn.relu or activation == lrelu:
        var_init = tf.random_normal_initializer(stddev = 2/fan_in)
    else:
        var_init = tf.random_normal_initializer(stddev = fan_in**(-1/2))
    
    W = tf.get_variable('weights', [filter[0], filter[1], target_shape[-1], depth], initializer=var_init)
    #variable summaries
    b = tf.get_variable('biases', target_shape[-1], initializer=tf.constant_initializer(bias_init))
    #variable summaries

    print(str(np.shape(input)) + "input shape")
    state = tf.nn.conv2d_transpose(input, W, target_shape, strides, padding) + b
    #state_depth=state.shape[-1]

    if normalize:
        state=batch_norm(state,norm_axes, is_training)
         
    conv_out = state

    if not(activation is None):
        conv_out = activation(state)
    
    return conv_out

def feedforward_layer(input, weights, bias_init=0.0, norm_axes=[0], normalize=False, activation=None, is_training=None):
    depth= input.shape[-1]
    fan_in = int(input.shape[-1])
    
    if activation == tf.nn.relu or activation == lrelu:
        var_init = tf.random_normal_initializer(stddev = 2/fan_in)
    else:
        var_init = tf.random_normal_initializer(stddev = fan_in**(-1/2))
    
    W = tf.get_variable('weights', weights, tf.float32,var_init)
    #variable summaries
    b = tf.get_variable('biases', weights[-1], initializer=tf.constant_initializer(bias_init))
    #variable summaries

    state = tf.matmul(input,W) + b
    #state_depth=state.shape[-1]

    if normalize:
        state = batch_norm(state,norm_axes, is_training)
          
    ff_out = state

    if not(activation is None):
        ff_out = activation(state)

    return ff_out

def flatten(x):
    size = int(np.prod(x.shape[1:]))
    return tf.reshape(x, [-1, size])

# def batch_norm(inp,norm_axes):
#     depth = inp.shape[-1]
#     epsilon = 1e-6
#     mean, var = tf.nn.moments(inp, norm_axes)
#     offset = tf.get_variable('offset1', depth, dtype=tf.float32, initializer=tf.constant_initializer(0.0))
#     scale = tf.get_variable('scale1', depth, dtype=tf.float32, initializer=tf.constant_initializer(1.0))
#     state = tf.nn.batch_normalization(inp, mean, var, offset, scale, epsilon)
#     return state
def _pop_batch_norm(x, pop_mean, pop_var, offset, scale):
    return tf.nn.batch_normalization(x, pop_mean, pop_var, offset, scale, 1e-6)

def _batch_norm(x, pop_mean, pop_var, mean, var, offset, scale):
    decay = 0.99
    new_pop_mean = pop_mean * decay + mean * (1 - decay)
    dependency_1 = tf.assign(pop_mean, pop_mean * decay + mean * (1 - decay))
    dependency_2 = tf.assign(pop_var, pop_var * decay + var * (1 - decay))

    with tf.control_dependencies([dependency_1, dependency_2]):
        return tf.nn.batch_normalization(x, mean, var, offset, scale, 1e-6)

def batch_norm(x, axes, is_training):
    depth = x.shape[-1]
    mean, var = tf.nn.moments(x, axes = axes)
    
    

    var_init = tf.constant_initializer(0.0)
    offset = tf.get_variable("offset", depth, tf.float32, var_init)
    var_init = tf.constant_initializer(1.0)
    scale = tf.get_variable("scale", depth, tf.float32, var_init)

    pop_mean = tf.get_variable("pop_mean", depth, initializer = tf.zeros_initializer(), trainable = False)
    pop_var = tf.get_variable("pop_var", depth, initializer = tf.ones_initializer(), trainable = False)
    
#     return _batch_norm(x, pop_mean, pop_var, mean, var, offset, scale)
    return tf.cond(
        is_training,
        lambda: _batch_norm(x, pop_mean, pop_var, mean, var, offset, scale),
        lambda: _pop_batch_norm(x, pop_mean, pop_var, offset, scale)
    )

def lrelu(x, alpha=0.2):
    return tf.nn.relu(x) - alpha * tf.nn.relu(-x)

In [18]:
def train(net,gen):
    '''
    Runs the tensorflow graph and evaluates the summary nodes, the optimizer nodes and the generated pictures node. Stores
    the generated pictures and the weights of the session after each epoch.
    @param:
        net: the tensorflow graph 
        gen: the data generator    

        Settings.epochs       : number of epochs
        Settings.restore      : Boolean determines if weights from last session will be restored or not
        Settings.show_captions: Boolean if generated should be stored with captions of without

    '''
    ######create folder to store generated images#######
    if not os.path.exists("./generated_pictures/"+str(Settings.run)):
        os.makedirs("./generated_pictures/"+str(Settings.run))
    
    ######### create Write for Tensorboard################
    discrimiator_writer=tf.summary.FileWriter('./summary_test/'+str(Settings.run)+'/discriminator',tf.get_default_graph())
    generator_writer=tf.summary.FileWriter('./summary_test/'+str(Settings.run)+'/generator')
    
    print("\n###################################")
    print("#          Start Training         #")
    print("###################################\n")
    
    ########################SESSION############################
    with tf.Session(graph=net.graph) as session:
        saver = tf.train.Saver()
        last_real_images=np.zeros([Settings.batch_size,64,64,3])
        to_visualize=np.ndarray(shape=(42,64,64,3))
        
        ############RESTORE PARAMETERS IF WANTED#########################
        if Settings.restore:
            saver.restore(session, tf.train.latest_checkpoint('./store_weights/'+str(Settings.run-1)))
        else:
            session.run(tf.global_variables_initializer())
        
        #############Training#################################################################################################
        step=0
        for epoch in range(Settings.epochs):
            t = time.time()
            #############Training#################################################################################################
            for real_embeddings, real_images, wrong_images, real_captions in gen.get_batches(Settings.batch_size):

                z = np.random.normal(0 , 1 ,[ Settings.batch_size , 100 ] )
                feed_dict = {net.z_vector: z,
                             net.real_images: real_images,
                             net.text_embeddings_real: real_embeddings,
                             net.wrong_images: wrong_images,
                             net.text_embeddings_wrong: real_embeddings,
                             net.text_embeddings: real_embeddings,
                             net.ist_training: True}
    
                generated_images, _, _, dis_sum, gen_sum = session.run([net.generated_images,net.dis_optimizer,net.gen_optimizer,net.dis_summary,net.gen_summary],
                            feed_dict=feed_dict)

                ##############Tensorboard summaries###############
                discrimiator_writer.add_summary(dis_sum,step)
                generator_writer.add_summary(gen_sum,step)
                step += 1
                last_real_images=real_images

            #####################Save Session###########################
            #saver.save(session, "./stored",1)
            saver.save(session,"./store_weights/"+str(Settings.run)+"/stored",step)
        
        ##########################Store Visualizations###############################
            if Settings.show_captions:
                ################### shows generated pictures with captions ###########
                fig = visual_with_captions(generated_images[0:8], real_captions[0:8])
            else:
                ####shows generated images and real images in one plot############
                to_visualize[0:7]=generated_images[0:7]
                to_visualize[7:14]=last_real_images[0:7]
                to_visualize[14:21]=generated_images[7:14]
                to_visualize[21:28]=last_real_images[7:14]
                to_visualize[28:35]=generated_images[14:21]
                to_visualize[35:42]=last_real_images[14:21]

                fig=visual(to_visualize)   
            plt.savefig("./generated_pictures/"+str(Settings.run)+"/"+str(epoch)+".png")
            plt.close(fig)
            
            t = time.time() - t
            minutes, seconds = divmod(t, 60)
            print(("-- Finished Epoch #{0} in {1: .0f}m{2: .2f}s.").format(epoch +1, minutes,seconds))

#################################################################################
############################### Visualization##################################
##############################################################################
#### call with images as batches e.g fig=visual(batch[0:39])
#### saving the figure with:
### plt.savefig('first_try.svg')

#%matplotlib inline
def visual(images, colums=7):
    '''
    Creates a figure of all given images 
    @param:
        images : Images that should be shown in one figure 
        colums : Number of images in one row
    @return:
        fig: figure of plot of the provided images
    '''    
    
    generated=False
    if(np.shape(images[0]) == (64,64,3)):
        generated = True
        
    nr_images = len(images)-len(images)%colums
    row = nr_images/colums
    fig = plt.figure(figsize=(10,10))
    
    for i in range(nr_images):
        ax=fig.add_subplot(row,colums,i+1)
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        if generated:
            imgplot=ax.imshow(images[i])
        else:
            imgplot=ax.imshow(images[i][1])
            
    return fig

#### for the captions  hab ich noch nicht mit unseren captions probieren können.
def visual_with_captions(images, captions):
    '''
    Create a figure of images and captions
    @param:
        images   : a array of images
        captions : a list of captions related to the images
    @return:
        fig      : figure containing the images and the captions
    '''
    
    captions = insert_breaks(captions,23)
    length = len(images)-len(images)%4
    fig = plt.figure(figsize=(15,15))
    fig.canvas.set_window_title("")
    i = 0
    count = 0
    
    while count < length:
        ax = fig.add_subplot(length/2,4,i+1)
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        imgplot = ax.imshow(images[count])
        ax = fig.add_subplot(length/2,4,i+2,frameon=False)
        ax.text(-0.15,0.6,captions[count])
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        i = i+2
        count = count+1
    return fig

##inserts line breaks into each caption, inserts break at the position of the next space after
#### the parameter number.
def insert_breaks(captions, number):
    ''' 
    Inserts spaces into the caption
    @param:
        captions : list of text descriptions
        number   : indicates the postion of where the spaces get inserted into the captions
    '''
    for index in range(len(captions)):
        lines=[]
        for i in range(0, len(captions[index]), number):
                lines.append(captions[index][i:i+number])
        captions[index] =  '\n'.join(lines)
    return captions

    

In [9]:
if __name__ == "__main__":
    '''
    Main that created a dataloader and a tensorflow graph of an GAN and trains it accordingly to the parameters
    specified in the unique Settings object.
    '''
    dl = DataLoader('data', load_into_memory=True)
    net = GAN()
    train(net, gen = dl)
    pass

## BUILD DataLoader ##
[Info] loading data/flower_embeddings_custom.npy
[Attention] Loading 4355 images with dimensions 64x64x3 into memory!!
Progress: |██████████████████████████████████████████████████| 100.0% Complete
(4355, 4)
Nr. Trainingsamples: 4355


NameError: name 'GAN' is not defined

In [None]:
train(net,dl,10,True)

In [None]:
# dl=DataLoader('.')
gen_test = dl.get_batches(100)
batch,wrong_batch=next(gen_test)
print(np.shape(batch[:,0]))

In [None]:
print(np.shape(batch[:,0][0]))
a=batch[:,0]
print(np.shape(a))

In [None]:
a=batch[0:32]
print(np.shape(a))
################################need this to get rid of embediings!!!!!!!!!!!!!
a=np.delete(a, 0, axis=1)
print(np.shape(a))
print(np.shape(a[1][0]))
print(np.shape(generated_images))

In [None]:
data_loader = DataLoader('data')
generator = data_loader.get_batches(10)
real_embeddings, real_images, wrong_embeddings, wrong_images, random_embeddings = next(generator)


print("real_embeddings: " + str(np.shape(real_embeddings)))
print("real_images: " + str(np.shape(real_images)))
print("wrong_embeddings: " + str(np.shape(wrong_embeddings)))
print("wrong_images: " + str(np.shape(wrong_images)))
print("random_embeddings: " + str(np.shape(random_embeddings)))

In [10]:
ja = [i for i in range(30)]
print(ja[0:10])
print(ja[10:10*2])
print(ja[10*2:10*3])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29]


In [58]:
#always feed real captions
print(("Finished Epoch #{0} in {1: .0f}m{2: .2f}s.").format(1, 0.2,0.3))


Finished Epoch #1 in  0m 0.30s.


In [66]:
batch_no = 1
batch_size = 64
real_images = np.zeros((batch_size, 64, 64, 3))
wrong_images = np.zeros((batch_size, 64, 64, 3))
captions = np.zeros((batch_size, 1024))

cnt = 0
image_files = []
for i in range(batch_no * batch_size, batch_no * batch_size + batch_size):
    idx = i % len(loaded_data['image_list'])
    image_file =  join(data_dir, 'flowers/jpg/'+loaded_data['image_list'][idx])
    image_array = image_processing.load_image_array(image_file, image_size)
    real_images[cnt,:,:,:] = image_array

    # Improve this selection of wrong image
    wrong_image_id = random.randint(0,len(loaded_data['image_list'])-1)
    wrong_image_file =  join(data_dir, 'flowers/jpg/'+loaded_data['image_list'][wrong_image_id])
    wrong_image_array = image_processing.load_image_array(wrong_image_file, image_size)
    wrong_images[cnt, :,:,:] = wrong_image_array

    random_caption = random.randint(0,4)
    captions[cnt,:] = loaded_data['captions'][ loaded_data['image_list'][idx] ][ random_caption ][0:caption_vector_length]
    image_files.append( image_file )
    cnt += 1

z_noise = np.random.uniform(-1, 1, [batch_size, z_dim])
return real_images, wrong_images, captions, z_noise, image_files

NameError: name 'loaded_data' is not defined

In [10]:
hello=[1,2,3,4]
print(hello[0:3])

[1, 2, 3]
