## Реализация алгоритма обработки изображений DeepDream с помощью VGG16

In [1]:
from keras.preprocessing.image import load_img, save_img, img_to_array
import numpy as np
import scipy

Using TensorFlow backend.


In [2]:
from keras.applications import VGG16
from keras import backend as K

model = VGG16(weights='imagenet',
              include_top=False)

In [3]:
base_image_path = 'image.jpg'
result_prefix = '1_'

In [42]:
settings = {
    'features': {
        'block2_conv2': 0.00004,
        'block3_conv2': 0.00002,
        'block4_conv3': 0.0005,
        'block5_conv3': 0.001,
    },
}
# Гиперпараметры для подбора эффектов
step = 0.1  # Шаг градиентного восхождения
num_octave = 3  # Количество октавных преобразований
octave_scale = 2  # Коэффициент масштабирования октав
iterations = 30  # Кол-во итераций на октаву
max_loss = 100.  # Ограничение loss

In [43]:
model = VGG16(weights='imagenet',    include_top=False)
model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [44]:
def preprocess_image(image_path):
    # Util function to open, resize and format pictures
    # into appropriate tensors.
    img = load_img(image_path)
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    return img

def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, x.shape[2], x.shape[3]))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((x.shape[1], x.shape[2], 3))
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.2

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [45]:
K.set_learning_phase(0)
dream = model.input
# Get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers])

# Вводим целевую функцию
loss = K.variable(0.)
for layer_name in settings['features']:
    if layer_name not in layer_dict:
        raise ValueError('Layer ' + layer_name + ' not found in model.')
    coeff = settings['features'][layer_name]
    x = layer_dict[layer_name].output
    # Для устранения краевых эффектов включаем только внутренние пикселы
    scaling = K.prod(K.cast(K.shape(x), 'float32'))
    if K.image_data_format() == 'channels_first':
        # Целевая функция - L2-норма градиента
        loss = loss + coeff * K.sum(K.square(x[:, :, 2: -2, 2: -2])) / scaling
    else:
        loss = loss + coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling

# Вычисляем градиенты loss по изображению
grads = K.gradients(loss, dream)[0]
# Нормализация градиента
grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon())

outputs = [loss, grads]
fetch_loss_and_grads = K.function([dream], outputs)
def eval_loss_and_grads(x):
    outs = fetch_loss_and_grads([x])
    loss_value = outs[0]
    grad_values = outs[1]
    return loss_value, grad_values

def resize_img(img, size):
    img = np.copy(img)
    if K.image_data_format() == 'channels_first':
        factors = (1, 1,
                   float(size[0]) / img.shape[2],
                   float(size[1]) / img.shape[3])
    else:
        factors = (1,
                   float(size[0]) / img.shape[1],
                   float(size[1]) / img.shape[2],
                   1)
    return scipy.ndimage.zoom(img, factors, order=1)


def gradient_ascent(x, iterations, step, max_loss=None):
    for i in range(iterations):
        loss_value, grad_values = eval_loss_and_grads(x)
        if max_loss is not None and loss_value > max_loss:
            break
        print('..Loss value at', i, ':', loss_value)
        x += step * grad_values
    return x

img = preprocess_image(base_image_path)
if K.image_data_format() == 'channels_first':
    original_shape = img.shape[2:]
else:
    original_shape = img.shape[1:3]
successive_shapes = [original_shape]
for i in range(1, num_octave):
    shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape])
    successive_shapes.append(shape)
successive_shapes = successive_shapes[::-1]
original_img = np.copy(img)
shrunk_original_img = resize_img(img, successive_shapes[0])

for shape in successive_shapes:
    print('Processing image shape', shape)
    img = resize_img(img, shape)
    img = gradient_ascent(img,
                          iterations=iterations,
                          step=step,
                          max_loss=max_loss)
    upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape)
    same_size_original = resize_img(original_img, shape)
    lost_detail = same_size_original - upscaled_shrunk_original_img
    # Re-inject details
    img += lost_detail
    shrunk_original_img = resize_img(original_img, shape)

save_img(result_prefix + base_image_path, deprocess_image(np.copy(img)))

Processing image shape (162, 200)
..Loss value at 0 : 97.820786
..Loss value at 1 : 100.78035
..Loss value at 2 : 103.805595
..Loss value at 3 : 106.91514
..Loss value at 4 : 110.120445
..Loss value at 5 : 113.42265
..Loss value at 6 : 116.79276
..Loss value at 7 : 120.23212
..Loss value at 8 : 123.75211
..Loss value at 9 : 127.404274
..Loss value at 10 : 131.15454
..Loss value at 11 : 134.95734
..Loss value at 12 : 138.84492
..Loss value at 13 : 142.82175
..Loss value at 14 : 146.85234
..Loss value at 15 : 150.96034
..Loss value at 16 : 155.1086
..Loss value at 17 : 159.36621
..Loss value at 18 : 163.68747
..Loss value at 19 : 168.0646
..Loss value at 20 : 172.51965
..Loss value at 21 : 177.06673
..Loss value at 22 : 181.7297
..Loss value at 23 : 186.46399
..Loss value at 24 : 191.27129
..Loss value at 25 : 196.15152
..Loss value at 26 : 201.09499
..Loss value at 27 : 206.09158
..Loss value at 28 : 211.13937
..Loss value at 29 : 216.23997
Processing image shape (325, 400)
..Loss value