In [1]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
tf.enable_eager_execution()

In [3]:
from keras.preprocessing.image import load_img, img_to_array
image = load_img("images/content/mug.jpg", target_size=(224, 224))
image = img_to_array(image)
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
image = preprocess_input(image)
image = tf.convert_to_tensor(image)

In [4]:
image

<tf.Tensor: id=1, shape=(1, 224, 224, 3), dtype=float32, numpy=
array([[[[-100.939    ,  -62.779    ,  -59.68     ],
         [-100.939    ,  -59.779    ,  -59.68     ],
         [-103.939    ,  -63.779    ,  -45.68     ],
         ...,
         [ -39.939003 ,  -25.779    ,   -6.6800003],
         [ -59.939003 ,  -28.779    ,  -12.68     ],
         [ -39.939003 ,  -35.779    ,  -16.68     ]],

        [[ -98.939    ,  -67.779    ,  -50.68     ],
         [-103.939    ,  -59.779    ,  -57.68     ],
         [ -95.939    ,  -62.779    ,  -61.68     ],
         ...,
         [ -50.939003 ,  -26.779    ,   -5.6800003],
         [ -47.939003 ,  -24.779    ,   -1.6800003],
         [ -68.939    ,  -52.779    ,  -28.68     ]],

        [[-101.939    ,  -60.779    ,  -56.68     ],
         [ -93.939    ,  -64.779    ,  -52.68     ],
         [-103.939    ,  -61.779    ,  -52.68     ],
         ...,
         [ -37.939003 ,  -26.779    ,   -3.6800003],
         [ -41.939003 ,  -17.779    ,    3

In [5]:
# Pretrained VGG16 model on imagenet
model = VGG16()

In [6]:
pred = model.predict(image)
pred

array([[7.46645945e-09, 1.18957331e-07, 1.61559455e-09, 6.69610856e-09,
        1.50035575e-08, 3.88458155e-08, 5.14524023e-09, 1.12834556e-07,
        1.06853868e-07, 1.31564164e-07, 7.16783859e-08, 3.03530726e-07,
        2.20922502e-07, 3.90760349e-08, 1.46355731e-07, 4.66080436e-08,
        6.21918161e-08, 5.46117462e-08, 7.39889714e-08, 1.38313993e-07,
        3.85590093e-09, 1.47633434e-08, 1.31194744e-08, 4.30811440e-08,
        5.31819850e-08, 1.23997044e-08, 2.83293371e-08, 1.87693615e-07,
        2.62537299e-08, 1.61781725e-06, 2.02956691e-08, 2.15941952e-07,
        7.99497499e-08, 1.68464442e-08, 8.43119086e-09, 5.95372240e-09,
        6.66525040e-08, 1.39984646e-08, 3.98995930e-08, 3.00059817e-08,
        3.77384382e-08, 2.02664641e-08, 2.04160671e-08, 2.23889458e-08,
        4.62807179e-08, 2.21660088e-07, 1.15475110e-07, 2.83632922e-08,
        3.23092131e-08, 1.77800601e-08, 2.14646469e-08, 3.26552261e-08,
        1.48767157e-07, 6.09307733e-08, 6.03349619e-08, 8.617022

In [7]:
# convert the probabilities to class labels
label = decode_predictions(pred)
# retrieve the most likely result, e.g. highest probability
label = label[0][0]
# print the classification
print('%s (%.2f%%)' % (label[1], label[2]*100))

coffee_mug (75.27%)


In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [9]:
for layer in model.layers[:19]:
    print(layer.name)

input_1
block1_conv1
block1_conv2
block1_pool
block2_conv1
block2_conv2
block2_pool
block3_conv1
block3_conv2
block3_conv3
block3_pool
block4_conv1
block4_conv2
block4_conv3
block4_pool
block5_conv1
block5_conv2
block5_conv3
block5_pool


In [10]:
def style_features(model, image):
    """ Run an image forward through a model and get the features for 
        a set of style layers.
        Returns a dictionary of the layer name and the activations.
    """
    style_layers = ['block1_conv2', 'block2_conv2', 'block3_conv3', 'block4_conv3']
    
    features = {}
    x = image
    # model._modules is a dictionary holding each module in the model
    for layer in model.layers:
        x = layer(x)
        if layer.name in style_layers:
            features[layer.name] = x
            if layer.name == 'block4_conv3':
                break
            
    return features

def content_feature(model, image):
    """ Run an image forward through a model and get the features for 
        a set of conent layers.
        Returns the activation of the content layer
    """
    style_layers = ['block3_conv3']
    
    x = image
    # model._modules is a dictionary holding each module in the model
    for layer in model.layers:
        x = layer(x)
        if layer.name in style_layers:
            features = x
            break
            
    return features

In [11]:
def gram_matrix(x):
    """ Compute gram matrix of a 3 dimensional convolution
    """
    b, h, w, c = tf.shape(x)
    x = tf.reshape(x, [b, c, -1])
    return tf.matmul(x, tf.transpose(x, perm=[0, 2, 1])) / (c * h * w)

In [17]:
def loss(y, content_image, style_image, content_weight, style_weight):
    """ Compute loss of output with respect to content and style image
    """
    # Pretrained VGG16 on imagenet
    model = VGG16()
    
    # Style features of output
    output_style_features = style_features(model, output_image)
    # Content features of output
    output_content_feature = content_feature(model, output_image)
    
    # Style features of style image
    style_features = style_features(model, style_image)
    # Content features of content image
    content_feature = content_feature(model, content_image)
    
    # Compute content loss
    # (output - content )/(Cj * Hj * Wj)
    content_loss = content_weight * tf.reduce_sum(tf.reduce_mean(tf.math.square(output_content_feature - content_feature)))
    
    # Compute style loss
    # Gram matrix of output features
    output_gram = [gram_matrix(x) for _, x in output_style_features.items()]
    # Convert output gram to tensor for efficient computation
    output_gram = tf.convert_to_tensor(output_gram)
                                                  
    # Gram matrix of style features
    style_gram = [gram_matrix(x) for _, x in style_features.items()]
    # Convert style gram to tensor for efficient computation
    style_gram = tf.convert_to_tensor(style_gram)
                                                  
    style_loss = tf.square(tf.norm(output_gram - style_gram))
                                                  
    # TODO: Add total variation regularization
    
    total_loss = content_weight * content_loss + style_weight * style_loss
    return total_loss
    

In [18]:
style_features(model, image)

{'block1_conv2': <tf.Tensor: id=657, shape=(1, 224, 224, 64), dtype=float32, numpy=
 array([[[[  0.       ,   0.       ,   0.       , ...,   0.       ,
             0.       , 134.49661  ],
          [ 24.753698 ,   0.       ,   0.       , ...,  18.237741 ,
             0.       ,  91.70744  ],
          [  0.       ,   0.       ,   0.       , ...,  22.323454 ,
            70.15368  ,  16.531952 ],
          ...,
          [  0.       ,   0.       ,   0.       , ...,  69.38382  ,
            54.69859  ,  52.436184 ],
          [  0.       ,   0.       ,   0.       , ...,  58.71202  ,
             0.       ,  93.16857  ],
          [147.3129   ,   6.3522286,   0.       , ...,  42.578434 ,
           111.755554 ,  48.949512 ]],
 
         [[ 72.596855 ,   0.       ,   0.       , ...,  59.340622 ,
             0.       ,  21.079945 ],
          [454.39282  , 475.9996   ,   0.       , ..., 249.94353  ,
             0.       ,   0.       ],
          [250.22014  , 405.507    ,   0.       , 

In [19]:
content_feature(model, image)

<tf.Tensor: id=741, shape=(1, 56, 56, 256), dtype=float32, numpy=
array([[[[   0.       ,    0.       ,    0.       , ...,  191.88591  ,
             0.       ,    0.       ],
         [   0.       ,    0.       ,    0.       , ...,    0.       ,
             0.       ,    0.       ],
         [   0.       ,    0.       ,    0.       , ...,    0.       ,
             0.       ,    0.       ],
         ...,
         [   0.       ,    0.       ,    0.       , ...,    0.       ,
           110.82649  ,    0.       ],
         [   0.       ,    0.       ,    0.       , ...,    0.       ,
             9.566113 ,    0.       ],
         [ 192.26158  ,  178.38237  ,    0.       , ...,  189.65485  ,
             0.       ,    0.       ]],

        [[   0.       ,  224.8784   ,    0.       , ...,    0.       ,
             0.       ,    0.       ],
         [   0.       ,    0.       ,    0.       , ...,    0.       ,
             0.       ,    0.       ],
         [   0.       ,  345.13245  , 