In [5]:
import metadata
import os

basepath='/media/data/datasets/sign/rwth-boston-104'
basepath='/data/datasets/rwth-boston-104'






In [6]:
import numpy as np
import skimage.io as io
import skimage
import tensorflow as tf
from keras import backend as K



class Boston104LocalizationIterator(tf.keras.preprocessing.image.Iterator ):
    """Iterator yielding data from a Numpy array.
    # Arguments
        x: Numpy array of input data.
        y: Numpy array of targets data.
        image_data_generator: Instance of `ImageDataGenerator`
            to use for random transformations and normalization.
        batch_size: Integer, size of a batch.
        shuffle: Boolean, whether to shuffle the data between epochs.
        seed: Random seed for data shuffling.
        data_format: String, one of `channels_first`, `channels_last`.
        save_to_dir: Optional directory where to save the pictures
            being yielded, in a viewable format. This is useful
            for visualizing the random transformations being
            applied, for debugging purposes.
        save_prefix: String prefix to use for saving sample
            images (if `save_to_dir` is set).
        save_format: Format to use for saving sample images
            (if `save_to_dir` is set).
    """

    def __init__(self, base_path,localization_grid_shape,body_parts=['head'],
                 batch_size=32,shuffle=False, seed=None,
                 data_format=None,
                 save_to_dir=None, save_prefix='', save_format='png'):
        self.basepath=basepath
        
        self.localization_grid_shape=localization_grid_shape
        self.body_parts=body_parts
        
        self.sub_image=np.array([0,240,10,326])
        self.h,self.w=(self.sub_image[1]-self.sub_image[0],self.sub_image[3]-self.sub_image[2])
        
        self.video_positions_filepath=os.path.join(basepath,'handpositions/boston104.handpositions.rybach-forster-dreuw-2009-09-25.full.xml')
        self.images_path=os.path.join(basepath,'png-segments')
        self.frames=metadata.parse_videos_to_images(self.video_positions_filepath,self.images_path)
        
        #self.image_data_generator = image_data_generator
        
        self.save_to_dir = save_to_dir
        self.save_prefix = save_prefix
        self.save_format = save_format
        
        
        super(Boston104LocalizationIterator, self).__init__(len(self.frames), batch_size, shuffle, seed)

    def _get_batches_of_transformed_samples(self, index_array):
        #print(index_array)
        batch_x,batch_y=self.read_boston104_frames(index_array[0])

        if self.save_to_dir:
            for i, j in enumerate(index_array):
                img = array_to_img(batch_x[i], self.data_format, scale=True)
                fname = '{prefix}_{index}_{hash}.{format}'.format(prefix=self.save_prefix,
                                                                  index=j,
                                                                  hash=np.random.randint(1e4),
                                                                  format=self.save_format)
                img.save(os.path.join(self.save_to_dir, fname))
                
        return batch_x, batch_y

    def next(self):
        """For python 2.x.
        # Returns
            The next batch.
        """
        # Keeps under lock only the mechanism which advances
        # the indexing of each batch.
        with self.lock:
            index_array = next(self.index_generator)
        # The transformation of images is not under thread lock
        # so it can be done in parallel
        return self._get_batches_of_transformed_samples(index_array)
    
    def image_position_to_grid_position(self,grid_shape,image_shape,image_position):
        ratio=grid_shape/image_shape
        grid_position=np.round(image_position*ratio)
        
        return grid_position.astype(int)
    
    def read_boston104_frames(self,frame_indices):
        
        n=len(frame_indices)
        x=np.zeros((n,self.h,self.w,1))
        image_shape=np.array([self.h,self.w])
        y_dim=np.prod(self.localization_grid_shape)
        y=np.zeros((n,y_dim))
        
        w=self.sub_image
        for (i,j) in enumerate(frame_indices):
            frame=self.frames[j]
            image=io.imread(frame.path)
            image=image[w[0]:w[1],w[2]:w[3],:]
            image=skimage.color.rgb2grey(image)
#             image = self.image_data_generator.random_transform(image.astype(K.floatx()))
#             image = self.image_data_generator.standardize(image)
            x[i,:,:,0]=image
            for body_part in self.body_parts:
                image_position_point=frame.positions[body_part]
                image_position=np.array([image_position_point.y,image_position_point.x])
                grid_position=self.image_position_to_grid_position(self.localization_grid_shape,image_shape,image_position)
                flat_position=np.ravel_multi_index(grid_position, self.localization_grid_shape)
                y[i,flat_position]=1
        
            
        return x,y


localization_grid_shape=np.array([14,20])
# implement iterator like https://github.com/fizyr/keras-retinanet/blob/master/keras_retinanet/preprocessing/pascal_voc.py    
iterator=Boston104LocalizationIterator(basepath,localization_grid_shape,body_parts=['head'],shuffle=True)



In [None]:
import matplotlib.pyplot as plt

#batch_x,batch_y=iterator.next()
#print(batch_x.shape)
#print(batch_y[0,:])
#plt.imshow(batch_x[0,0,:,:])
#print(np.where(batch_y[0,:]>0))
#plt.show()

# ResNext model

[original](https://gist.github.com/mjdietzx/0cb95922aac14d446a6530f87b3a04ce)

In [9]:
from keras import layers
from keras import models

cardinality = 32


def residual_network(x,classes):
    """
    ResNeXt by default. For ResNet set `cardinality` = 1 above.
    
    """
    def add_common_layers(y):
        y = layers.BatchNormalization()(y)
        y = layers.LeakyReLU()(y)

        return y

    def grouped_convolution(y, nb_channels, _strides):
        # when `cardinality` == 1 this is just a standard convolution
        if cardinality == 1:
            return layers.Conv2D(nb_channels, kernel_size=(3, 3), strides=_strides, padding='same')(y)
        
        assert not nb_channels % cardinality
        _d = nb_channels // cardinality

        # in a grouped convolution layer, input and output channels are divided into `cardinality` groups,
        # and convolutions are separately performed within each group
        groups = []
        for j in range(cardinality):
            group = layers.Lambda(lambda z: z[:, :, :, j * _d:j * _d + _d])(y)
            groups.append(layers.Conv2D(_d, kernel_size=(3, 3), strides=_strides, padding='same')(group))
            
        # the grouped convolutional layer concatenates them as the outputs of the layer
        y = layers.concatenate(groups)

        return y

    def residual_block(y, nb_channels_in, nb_channels_out, _strides=(1, 1), _project_shortcut=False):
        """
        Our network consists of a stack of residual blocks. These blocks have the same topology,
        and are subject to two simple rules:
        - If producing spatial maps of the same size, the blocks share the same hyper-parameters (width and filter sizes).
        - Each time the spatial map is down-sampled by a factor of 2, the width of the blocks is multiplied by a factor of 2.
        """
        shortcut = y

        # we modify the residual building block as a bottleneck design to make the network more economical
        y = layers.Conv2D(nb_channels_in, kernel_size=(1, 1), strides=(1, 1), padding='same')(y)
        y = add_common_layers(y)

        # ResNeXt (identical to ResNet when `cardinality` == 1)
        y = grouped_convolution(y, nb_channels_in, _strides=_strides)
        y = add_common_layers(y)

        y = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=(1, 1), padding='same')(y)
        # batch normalization is employed after aggregating the transformations and before adding to the shortcut
        y = layers.BatchNormalization()(y)

        # identity shortcuts used directly when the input and output are of the same dimensions
        if _project_shortcut or _strides != (1, 1):
            # when the dimensions increase projection shortcut is used to match dimensions (done by 1×1 convolutions)
            # when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2
            shortcut = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=_strides, padding='same')(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)

        y = layers.add([shortcut, y])

        # relu is performed right after each batch normalization,
        # expect for the output of the block where relu is performed after the adding to the shortcut
        y = layers.LeakyReLU()(y)

        return y

    # conv1
    x = layers.Conv2D(64, kernel_size=(7, 7), strides=(2, 2), padding='same')(x)
    x = add_common_layers(x)

    # conv2
    x = layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
    for i in range(3):
        project_shortcut = True if i == 0 else False
        x = residual_block(x, 128, 256, _project_shortcut=project_shortcut)

    # conv3
    for i in range(4):
        # down-sampling is performed by conv3_1, conv4_1, and conv5_1 with a stride of 2
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 256, 512, _strides=strides)

    # conv4
    for i in range(6):
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 512, 1024, _strides=strides)

#     # conv5
#     for i in range(3):
#         strides = (2, 2) if i == 0 else (1, 1)
#         x = residual_block(x, 1024, 2048, _strides=strides)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(classes)(x)

    return x

In [10]:

# from keras.models import Sequential
# from keras.layers.convolutional import Conv2D
# from keras.layers.convolutional import MaxPooling2D
# from keras.layers import Dense
# from keras.layers import Dropout
# from keras.layers import Flatten
# from keras.constraints import maxnorm

#model = Sequential()
# model.add(Conv2D(32, (3, 3), input_shape=(240,316, 1), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
# for i in range(4):
#     model.add(Conv2D(32, (3, 3),strides=(2,2), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
#     model.add(Conv2D(32, (3, 3), padding='same', activation='relu', kernel_constraint=maxnorm(3)))

# model.add(Flatten())
# model.add(Dense(300, kernel_initializer='normal', activation='relu'))
# model.add(Dense(classes, kernel_initializer='normal', activation='sigmoid'))
# model.summary()
x_shape=(240,316,1)
classes = localization_grid_shape.prod()
image_tensor = layers.Input(shape=x_shape)
network_output = residual_network(image_tensor,classes)
  
model = models.Model(inputs=[image_tensor], outputs=[network_output])
print(model.summary())



__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 240, 316, 1)  0                                            
__________________________________________________________________________________________________
conv2d_550 (Conv2D)             (None, 120, 158, 64) 3200        input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_54 (BatchNo (None, 120, 158, 64) 256         conv2d_550[0][0]                 
__________________________________________________________________________________________________
leaky_re_lu_50 (LeakyReLU)      (None, 120, 158, 64) 0           batch_normalization_54[0][0]     
__________________________________________________________________________________________________
max_poolin

lambda_787 (Lambda)             (None, 15, 20, 16)   0           leaky_re_lu_75[0][0]             
__________________________________________________________________________________________________
lambda_788 (Lambda)             (None, 15, 20, 16)   0           leaky_re_lu_75[0][0]             
__________________________________________________________________________________________________
lambda_789 (Lambda)             (None, 15, 20, 16)   0           leaky_re_lu_75[0][0]             
__________________________________________________________________________________________________
lambda_790 (Lambda)             (None, 15, 20, 16)   0           leaky_re_lu_75[0][0]             
__________________________________________________________________________________________________
lambda_791 (Lambda)             (None, 15, 20, 16)   0           leaky_re_lu_75[0][0]             
__________________________________________________________________________________________________
lambda_792

In [None]:
def mean_pred(y_true, y_pred):
    return K.mean(y_pred)

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy', mean_pred])

model.fit_generator(iterator,steps_per_epoch=10, epochs=10)