Fix tensorflow#224 Add tests for architecture modules.

* Add test coverage for all model architectures * resnet18.build_resnet is now consistent with the other architectures. The function now connects the x input_layer to the model and returns the output layer of the model. * Remove the keras.application.imagenet_utils.preprocess_input() function from resnet18.build_resnet(). * Make the application of pooling and include_tup consistent for all architectures. Pooling is now first applied before checking include_top. * Add min_pixel_value param for visualization.visualize_views to ensure that we properly scale the images when plotting. * Update unsupervised_hello_world to include the prepocess scaling on all dataset. Previously it was not included in the callback and this would break the binary_accuracy in the EvalCallback().
abeltheo · Apr 2, 2022 · 412ab0b · 412ab0b
1 parent df0929a
commit 412ab0b
Show file tree

Hide file tree

Showing 8 changed files with 381 additions and 209 deletions.
diff --git a/examples/unsupervised_hello_world.ipynb b/examples/unsupervised_hello_world.ipynb
diff --git a/tensorflow_similarity/architectures/efficientnet.py b/tensorflow_similarity/architectures/efficientnet.py
@@ -54,7 +54,7 @@ def EfficientNetSim(
     l2_norm: bool = True,
     include_top: bool = True,
     pooling: str = "gem",
-    gem_p=1.0,
+    gem_p=3.0,
 ) -> SimilarityModel:
     """Build an EffecientNet Model backbone for similarity learning
 
@@ -125,19 +125,19 @@ def EfficientNetSim(
 
     x = build_effnet(x, variant, weights, trainable)
 
-    if include_top:
+    if pooling == "gem":
         x = GeneralizedMeanPooling2D(p=gem_p, name="gem_pool")(x)
+    elif pooling == "avg":
+        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
+    elif pooling == "max":
+        x = layers.GlobalMaxPooling2D(name="max_pool")(x)
+
+    if include_top and pooling is not None:
         if l2_norm:
             outputs = MetricEmbedding(embedding_size)(x)
         else:
             outputs = layers.Dense(embedding_size)(x)
     else:
-        if pooling == "gem":
-            x = GeneralizedMeanPooling2D(p=gem_p, name="gem_pool")(x)
-        elif pooling == "avg":
-            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
-        elif pooling == "max":
-            x = layers.GlobalMaxPooling2D(name="max_pool")(x)
         outputs = x
 
     return SimilarityModel(inputs, outputs)

diff --git a/tensorflow_similarity/architectures/resnet18.py b/tensorflow_similarity/architectures/resnet18.py
@@ -16,7 +16,6 @@
 from typing import Tuple
 import tensorflow as tf
 from tensorflow.keras import layers
-from tensorflow.keras.applications import imagenet_utils
 from tensorflow_similarity.layers import MetricEmbedding
 from tensorflow_similarity.layers import GeneralizedMeanPooling2D
 from tensorflow_similarity.models import SimilarityModel
@@ -29,17 +28,15 @@ def ResNet18Sim(
     l2_norm: bool = True,
     include_top: bool = True,
     pooling: str = "gem",
-    gem_p=1.0,
-    preproc_mode: str = "torch",
-    similarity_model: bool = True,
+    gem_p=3.0,
 ) -> SimilarityModel:
     """Build an ResNet18 Model backbone for similarity learning
 
     Architecture from [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
 
     Args:
         input_shape: Expected to be betweeen 32 and 224 and in the (H, W, C)
-        data_format augmentation function.
+        data_format.
 
         embedding_size: Size of the output embedding. Usually between 64
         and 512. Defaults to 128.
@@ -67,22 +64,13 @@ def ResNet18Sim(
         of 1.0 is equivelent to GlobalMeanPooling2D, while larger values
         will increase the contrast between activations within each feature
         map, and a value of math.inf will be equivelent to MaxPool2d.
-
-        preproc_mode: One of "caffe", "tf" or "torch".
-        - caffe: will convert the images from RGB to BGR, then will zero-center
-          each color channel with respect to the ImageNet dataset, without
-          scaling.
-        - tf: will scale pixels between -1 and 1, sample-wise.
-        - torch: will scale pixels between 0 and 1 and then will normalize each
-          channel with respect to the ImageNet dataset.
     """
 
     # input
     inputs = layers.Input(shape=input_shape)
     x = inputs
 
-    resnet = build_resnet(x, "channels_last", preproc_mode)
-    x = resnet(x)
+    x = build_resnet(x, input_shape)
 
     if pooling == "gem":
         x = GeneralizedMeanPooling2D(p=gem_p, name="gem_pool")(x)
@@ -102,46 +90,44 @@ def ResNet18Sim(
     return SimilarityModel(inputs, outputs, name="resnet18sim")
 
 
-def build_resnet(x: layers.Layer, data_format, preproc_mode) -> layers.Layer:
+def build_resnet(
+    x: layers.Layer, input_shape: Tuple[int, int, int]
+) -> layers.Layer:
     """Build the requested ResNet.
 
     Args:
         x: The input layer to the ResNet.
 
-        data_format: Data format of the image tensor.
-
-        preproc_mode: One of "caffe", "tf" or "torch".
+        input_shape: Expected to be betweeen 32 and 224 and in the (H, W, C)
+        data_format.
 
     Returns:
         The ouptut layer of the ResNet model
     """
-    # Handle the case where x.shape includes the placeholder for the batch dim.
-    if x.shape[0] is None:
-        inputs = layers.Input(shape=x.shape[1:])
-    else:
-        inputs = layers.Input(shape=x.shape)
+    inputs = layers.Input(shape=input_shape)
 
-    x = imagenet_utils.preprocess_input(
-        x, data_format=data_format, mode=preproc_mode
-    )
-    x = tf.keras.layers.ZeroPadding2D(
+    layer = tf.keras.layers.ZeroPadding2D(
         padding=((1, 1), (1, 1)), name="conv1_pad"
     )(inputs)
-    x = tf.keras.layers.Conv2D(
+    layer = tf.keras.layers.Conv2D(
         64,
         kernel_size=3,
         strides=1,
         use_bias=False,
         kernel_initializer=tf.keras.initializers.LecunUniform(),
         name="conv1_conv",
-    )(x)
-    x = tf.keras.layers.BatchNormalization(epsilon=1.001e-5, name="conv1_bn")(x)
-    x = tf.keras.layers.Activation("relu", name="conv1_relu")(x)
+    )(layer)
+    layer = tf.keras.layers.BatchNormalization(
+        epsilon=1.001e-5, name="conv1_bn"
+    )(layer)
+    layer = tf.keras.layers.Activation("relu", name="conv1_relu")(layer)
 
-    outputs = stack_fn(x)
+    outputs = stack_fn(layer)
 
-    model = tf.keras.Model(inputs, outputs, name="resnet18")
-    return model
+    # wire
+    x = tf.keras.Model(inputs, outputs, name="resnet18")(x)
+
+    return x
 
 
 def block0(

diff --git a/tensorflow_similarity/architectures/resnet50.py b/tensorflow_similarity/architectures/resnet50.py
@@ -31,7 +31,7 @@ def ResNet50Sim(
     l2_norm: bool = True,
     include_top: bool = True,
     pooling: str = "gem",
-    gem_p=1.0,
+    gem_p=3.0,
 ) -> SimilarityModel:
     """Build an ResNet50 Model backbone for similarity learning
 
@@ -85,19 +85,19 @@ def ResNet50Sim(
 
     x = build_resnet(x, weights, trainable)
 
-    if include_top:
+    if pooling == "gem":
         x = GeneralizedMeanPooling2D(p=gem_p, name="gem_pool")(x)
+    elif pooling == "avg":
+        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
+    elif pooling == "max":
+        x = layers.GlobalMaxPooling2D(name="max_pool")(x)
+
+    if include_top and pooling is not None:
         if l2_norm:
             outputs = MetricEmbedding(embedding_size)(x)
         else:
             outputs = layers.Dense(embedding_size)(x)
     else:
-        if pooling == "gem":
-            x = GeneralizedMeanPooling2D(p=gem_p, name="gem_pool")(x)
-        elif pooling == "avg":
-            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
-        elif pooling == "max":
-            x = layers.GlobalMaxPooling2D(name="max_pool")(x)
         outputs = x
 
     return SimilarityModel(inputs, outputs)

diff --git a/tensorflow_similarity/visualization/vizualize_views.py b/tensorflow_similarity/visualization/vizualize_views.py
@@ -10,7 +10,8 @@ def visualize_views(views: Tensor,
                     num_imgs: int = None,
                     views_per_col: int = 4,
                     fig_size: Tuple[int, int] = (24, 4),
-                    max_pixel_value: float = 1.0):
+                    max_pixel_value: float = 1.0,
+                    min_pixel_value: float = 0.0):
     """Display side by side different image views with labels, and predictions
 
     Args:
@@ -21,6 +22,8 @@ def visualize_views(views: Tensor,
         views_per_col: Int, number of images in one row. Defaults to 3.
         max_pixel_value: Max expected value for a pixel. Used to scale the image
           between [0,1].
+        min_pixel_value: Min expected value for a pixel. Used to scale the image
+          between [0,1].
 
     Returns:
         None.
@@ -41,7 +44,8 @@ def visualize_views(views: Tensor,
         else:
             ax = axes[i // num_col, i % num_col]
 
-        pair = [views[j][i] / max_pixel_value for j in range(num_views)]
+        scale = abs(max_pixel_value - min_pixel_value)
+        pair = [(views[j][i] - min_pixel_value) / scale for j in range(num_views)]
         ax.imshow(tf.concat(pair, axis=1))
         ax.set_axis_off()
 

diff --git a/tests/architectures/test_efficientnet.py b/tests/architectures/test_efficientnet.py
@@ -78,3 +78,52 @@ def test_build_effnet_unsupported_trainable():
     msg = "foo is not a supported option for 'trainable'."
     with pytest.raises(ValueError, match=msg):
         _ = efficientnet.build_effnet(input_layer, "b0", "imagenet", "foo")
+
+
+def test_unsuported_varient():
+    input_shape = (224, 224, 3)
+    msg = "Unknown efficientnet variant. Valid B0...B7"
+    with pytest.raises(ValueError, match=msg):
+        _ = efficientnet.EfficientNetSim(input_shape, 128, "bad_varient")
+
+
+def test_include_top():
+    input_shape = (224, 224, 3)
+    effnet = efficientnet.EfficientNetSim(input_shape, include_top=True)
+
+    # The second to last layer should use gem pooling when include_top is True
+    assert effnet.layers[-2].name == 'gem_pool'
+    assert effnet.layers[-2].p == 3.0
+    # The default is l2_norm True, so we expect the last layer to be
+    # MetricEmbedding.
+    assert re.match('metric_embedding', effnet.layers[-1].name) is not None
+
+
+def test_l2_norm_false():
+    input_shape = (224, 224, 3)
+    effnet = efficientnet.EfficientNetSim(
+            input_shape,
+            include_top=True,
+            l2_norm=False)
+
+    # The second to last layer should use gem pooling when include_top is True
+    assert effnet.layers[-2].name == 'gem_pool'
+    assert effnet.layers[-2].p == 3.0
+    # If l2_norm is False, we should return a dense layer as the last layer.
+    assert re.match('dense', effnet.layers[-1].name) is not None
+
+
+@pytest.mark.parametrize(
+    "pooling, name",
+    zip(['gem', 'avg', 'max'], ['gem_pool', 'avg_pool', 'max_pool']),
+    ids=['gem', 'avg', 'max']
+)
+def test_include_top_false(pooling, name):
+    input_shape = (224, 224, 3)
+    effnet = efficientnet.EfficientNetSim(
+            input_shape,
+            include_top=False,
+            pooling=pooling)
+
+    # The second to last layer should use gem pooling when include_top is True
+    assert effnet.layers[-1].name == name
diff --git a/tests/architectures/test_resnet18.py b/tests/architectures/test_resnet18.py
@@ -0,0 +1,47 @@
+import re
+
+import pytest
+
+from tensorflow_similarity.architectures import resnet18
+
+
+def test_include_top():
+    input_shape = (32, 32, 3)
+    resnet = resnet18.ResNet18Sim(input_shape, include_top=True)
+
+    # The second to last layer should use gem pooling when include_top is True
+    assert resnet.layers[-2].name == 'gem_pool'
+    assert resnet.layers[-2].p == 3.0
+    # The default is l2_norm True, so we expect the last layer to be
+    # MetricEmbedding.
+    assert re.match('metric_embedding', resnet.layers[-1].name) is not None
+
+
+def test_l2_norm_false():
+    input_shape = (32, 32, 3)
+    resnet = resnet18.ResNet18Sim(
+            input_shape,
+            include_top=True,
+            l2_norm=False)
+
+    # The second to last layer should use gem pooling when include_top is True
+    assert resnet.layers[-2].name == 'gem_pool'
+    assert resnet.layers[-2].p == 3.0
+    # If l2_norm is False, we should return a dense layer as the last layer.
+    assert re.match('dense', resnet.layers[-1].name) is not None
+
+
+@pytest.mark.parametrize(
+    "pooling, name",
+    zip(['gem', 'avg', 'max'], ['gem_pool', 'avg_pool', 'max_pool']),
+    ids=['gem', 'avg', 'max']
+)
+def test_include_top_false(pooling, name):
+    input_shape = (32, 32, 3)
+    resnet = resnet18.ResNet18Sim(
+            input_shape,
+            include_top=False,
+            pooling=pooling)
+
+    # The second to last layer should use gem pooling when include_top is True
+    assert resnet.layers[-1].name == name