In [1]:
import numpy as np
from keras.layers import Input, Dense
from keras.models import Model
from keras.datasets import mnist

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print('There are %d train and %d test data'%(len(x_train),len(x_test)))
n_train, n_x, n_y = x_train.shape
print('Each image has shape %d by %d' %(n_x, n_y))

There are 60000 train and 10000 test data
Each image has shape 28 by 28


In [4]:
x_train[0]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
         18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
          0,   0],
       [  

In [3]:
### functions for superposing images
def coord_to_idx(x, y, length, width):
    ## turn x, y coordinate into an index of array
    ## we assume the origin to be the upper left corner of image
    ## we assume x corresponds to index along width
    return y*width+x
def idx_to_coord(idx, length, width):
    # return tuple (x, y)
    n_r = 0
    while (n_r+1) * width - 1 < idx:
        n_r += 1
    #n_r -= 1
    return idx-n_r*width, n_r
            
def embed_image(origin_x, origin_y, frame, image_to_embed):
    """
    Embed 2D image into a 2D frame
    starting from the location (origin_x, origin_y)
    return: frame embedded with image
    """
    image_length, image_width = image_to_embed.shape
    frame_length, frame_width = frame.shape
    #print('embedded image at location %d by %d'%(origin_x,origin_y))
    for i_x_idx, f_x_idx in enumerate(range(origin_x, origin_x+image_width)):
        for i_y_idx, f_y_idx in enumerate(range(origin_y, origin_y+image_length)):
            #print('index of image', i_y_idx, i_x_idx)
            #print('index of frame', f_y_idx, f_x_idx)
            frame[f_y_idx, f_x_idx] += image_to_embed[i_y_idx, i_x_idx]
    return frame

def calc_feasible_regions(origin_x, origin_y, image_length, 
                          image_width,frame_length, frame_width,
                          max_x_overlap, max_y_overlap):
    x_range = np.array(range(frame_width))
    y_range = np.array(range(frame_length))
    x_feasible = np.append(x_range[x_range>=origin_x+image_width-max_x_overlap],
                   x_range[x_range<=origin_x-image_width+max_x_overlap])
    #print('feasible x coordinates', list(x_feasible))
    y_feasible = np.append(y_range[y_range>=origin_y+image_length-max_y_overlap],
                   y_range[y_range<=origin_y-image_length+max_y_overlap])
    feasible_origins_idx = list()
    for x in x_feasible:
        for y in y_feasible:
            feasible_origins_idx.append(coord_to_idx(x, y, frame_length, frame_width))
    return feasible_origins_idx

def calc_init_feasible_set(f_length, f_width, image_length, image_width):
    feasible_li = list()
    for y in range(f_length-image_length):
        for x in range(f_width-image_width):
            feasible_li.append(coord_to_idx(x, y, f_length, f_width))
    return set(feasible_li)


def create_frame(f_length, f_width, image_source, max_n_images, max_overlap_x, max_overlap_y):
    ## create empty frame
    frame = np.zeros((f_length, f_width))
    n_images = np.random.randint(1, max_n_images+1)
    #print('number of images supposed to be',n_images)
    image_length, image_width = image_source[0].shape
    #feasible_origins = set(range((f_length-image_length+1)*(f_width-image_width+1)))
    feasible_origins = calc_init_feasible_set(f_length, f_width, image_length, image_width)
    count = 0
    for i in range(n_images):
        #print(feasible_origins)
        if not bool(feasible_origins):
            #print('number of images generated', count)   
            return frame, count
        count += 1
        ## randomly sample index from feasible_origins
        o_idx = np.random.choice(list(feasible_origins))
        o_x, o_y = idx_to_coord(o_idx, f_length, f_width)
        ## randomly draw an image from image_source and embed it
        image_to_embed = image_source[np.random.randint(len(image_source)),:,:]
        frame = embed_image(o_x, o_y, frame, image_to_embed)
        new_feasible_origins = calc_feasible_regions(o_x, o_y, image_length, image_width, 
                                                     f_length, f_width,
                                                     max_overlap_x, max_overlap_y)
        #print('new feasible', new_feasible_origins)
        feasible_origins = feasible_origins & set(new_feasible_origins)
    #print('number of images generated', count)    
    return (frame, count)

def create_dataset(f_length, f_width, image_source, max_n_images, max_overlap_x, max_overlap_y, n_samples):
    #print(create_frame(f_length, f_width, image_source, max_n_images, max_overlap_x, max_overlap_y))
    return [create_frame(f_length, f_width, image_source, max_n_images, max_overlap_x, max_overlap_y) \
                        for i in range(n_samples)]

In [7]:
# feasible_ = calc_init_feasible_set(50, 50, 28, 28)
# for idx in feasible_:
#     print(idx)
#     print(idx_to_coord(idx, 50, 50)[1],idx_to_coord(idx, 50, 50)[0])

In [8]:
f_length = 80
f_width = 80
image_source = x_train[:100]
max_n_images = 5
max_overlap_x = 20
max_overlap_y = 20
n_samples = 2
frame_label = create_dataset(f_length, f_width, image_source, max_n_images, max_overlap_x, max_overlap_y, n_samples)

In [9]:
frame_label

[(array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         ..., 
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.]]), 2),
 (array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         ..., 
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.],
         [ 0.,  0.,  0., ...,  0.,  0.,  0.]]), 3)]

In [None]:
# %matplotlib inline
# import matplotlib.pyplot as plt
# frame, label = frame_label
# print(label)
# print(frame)
# ##
# fig = plt.figure()
# plt.imshow(frame)
# plt.gray()
# plt.show()
# plt.close('all')

In [5]:
#### create data generator for reconstruction
def mnist_clustering_generator(f_length, f_width, 
                               image_source, max_n_images, 
                               max_overlap_x, max_overlap_y, 
                               batch_size, total_n_samples, model_type='autoencoder'):
    
    plain_label = np.zeros(max_n_images)
    n_epochs = 0
    data_collections = list()
    ind = 0
    ## process data to algorithm-friendly format
    #if model_type=='autoencoder':
    while n_epochs < total_n_samples/batch_size:
        ind = ind+1
        if ind < total_n_samples:
            # create new data-label pairs
            # get list of data-label tuples (list length equals batch_size)
            frame_label_list = create_dataset(f_length, f_width, image_source, 
                                          max_n_images, max_overlap_x, 
                                          max_overlap_y, batch_size)
            #print(frame_label_list)
            #frames, labels = zip(*frame_label_list)
            #print(frames)
            frames = list()
            labels = list()
            for i in range(len(frame_label_list)):
                frame, label = frame_label_list[i]
                frames.append(np.array(frame).flatten())
                labels.append(labels)
            frames = np.array(frames)
            labels = np.array(labels)
            data_collections.append((frames, labels))
        else:
            if ind == total_n_samples:
                n_epochs += 1
            ind = ind % total_n_samples
            frames, labels = data_collections[ind]
            #x.append(frame.flatten())
            #y_ = plain_label.copy()
            #y_[label-1] = 1
            #y.append(y_)
            #print(np.array(x).shape)
        #print(frames)
        yield (frames, frames)
        
####### create generator for predicting number of clusters
def mnist_clustering_generator_for_pred(f_length, f_width, 
                               image_source, max_n_images, 
                               max_overlap_x, max_overlap_y, 
                               batch_size, total_n_samples, model_type='autoencoder'):
    
    plain_label = np.zeros(max_n_images)
    n_epochs = 0
    data_collections = list()
    ind = 0
    ## process data to algorithm-friendly format
    #if model_type=='autoencoder':
    while True:
        ind = ind+1
        label_template = np.zeros(max_n_images)
        if ind < total_n_samples:
            # create new data-label pairs
            # get list of data-label tuples (list length equals batch_size)
            frame_label_list = create_dataset(f_length, f_width, image_source, 
                                          max_n_images, max_overlap_x, 
                                          max_overlap_y, batch_size)
            #print(frame_label_list)
            #frames, labels = zip(*frame_label_list)
            #print(frames)
            frames = list()
            labels = list()
            for i in range(len(frame_label_list)):
                frame, label_ = frame_label_list[i]
                frames.append(np.array(frame).flatten())
                label = label_template.copy()
                label[label_-1] = 1
                labels.append(label)
            frames = np.array(frames)
            labels = np.array(labels)
            data_collections.append((frames, labels))
        else:
            if ind == total_n_samples:
                n_epochs += 1
            ind = ind % total_n_samples
            frames, labels = data_collections[ind]
            #x.append(frame.flatten())
            #y_ = plain_label.copy()
            #y_[label-1] = 1
            #y.append(y_)
            #print(np.array(x).shape)
        #print(frames)
        yield (frames, labels)

## Autoencoder model

In [11]:
# this is the size of our encoded representations
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

# this is our input placeholder
input_img = Input(shape=(f_length*f_width,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_img)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(f_length*f_width, activation='sigmoid')(encoded)

# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)

# this model maps an input to its encoded representation
# encoder = Model(input_img, encoded)

# # create a placeholder for an encoded (32-dimensional) input
# encoded_input = Input(shape=(encoding_dim,))
# # retrieve the last layer of the autoencoder model
# decoder_layer = autoencoder.layers[-1]
# # create the decoder model
# decoder = Model(encoded_input, decoder_layer(encoded_input))

autoencoder.compile(optimizer='adadelta', loss='mse')

In [None]:
f_length = 80
f_width = 80
image_source = x_train[:1000]
max_n_images = 5
max_overlap_x = 20
max_overlap_y = 20
batch_size = 1
total_n_samples = 1000

data_gen = mnist_clustering_generator(f_length, f_width, 
                               image_source, max_n_images, 
                               max_overlap_x, max_overlap_y, 
                               batch_size, total_n_samples, model_type='autoencoder')

autoencoder.fit_generator(data_gen,
        steps_per_epoch=total_n_samples/batch_size, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20

## Prediction model

In [6]:
#### model params
f_length = 80
f_width = 80

### feature map using encoder
# this is the size of our encoded representations
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats
output_dim = 5
# this is our input placeholder
input_img = Input(shape=(f_length*f_width,))
## hidden layers
hidden1 = Dense(encoding_dim, activation='relu')(input_img)
# 
#decoded = Dense(f_length*f_width, activation='sigmoid')(encoded)
## output layers
output = Dense(output_dim, activation='softmax')(hidden1)

###
predictor = Model(input_img, output)
predictor.compile(optimizer='adadelta', loss='categorical_crossentropy')

In [8]:
### algorithm params
image_source = x_train[:1000]
max_n_images = 5
max_overlap_x = 20
max_overlap_y = 20
batch_size = 1
total_n_samples = 1000

data_gen = mnist_clustering_generator_for_pred(f_length, f_width, 
                               image_source, max_n_images, 
                               max_overlap_x, max_overlap_y, 
                               batch_size, total_n_samples)

predictor.fit_generator(data_gen,
        steps_per_epoch=total_n_samples/batch_size, epochs=20, verbose=1)

Epoch 1/20
   1/1000 [..............................] - ETA: 438s - loss: 16.1181

Exception in thread Thread-11:
Traceback (most recent call last):
  File "/Users/tangch/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/Users/tangch/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/Users/tangch/anaconda2/lib/python2.7/site-packages/keras/utils/data_utils.py", line 560, in data_generator_task
    generator_output = next(self._generator)
  File "<ipython-input-5-930dc4bd76a9>", line 76, in mnist_clustering_generator_for_pred
    label[label_] = 1
IndexError: index 5 is out of bounds for axis 0 with size 5



StopIteration: 