In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"

import tensorflow as tf
import tensorflow.keras as keras
import numpy as np

from tensorflow.python.keras.utils import tf_utils

import sys
sys.path.append('../')

from layers.noisy_bn import NoisyBatchNormalization, BetterNoisyBatchNormalization
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
class TestLayer(keras.layers.Layer):
    
    def __init__(self):
        super(TestLayer, self).__init__()
        self.weight = self.add_weight(shape=(),
                                     initializer='zeros',
                                     trainable=False)
        
    def call(self, inputs, training=None):
        if training is None:
            training = keras.backend.learning_phase()
        
#         self.weight.assign(self.weight+1)
        def train_fn_(self):
            self.weight.assign(self.weight+1)
            return tf.zeros(tf.shape(inputs))
        
        train_fn = lambda: train_fn_(self)
        eval_fn = lambda: tf.ones(tf.shape(inputs))
            
        return tf_utils.smart_cond(training, train_fn, eval_fn)

In [22]:
keras.backend.learning_phase()

0

In [23]:
class MyModel(keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.layer = TestLayer()
        
    def call(self, inputs, training=None):
        return self.layer(inputs, training)

In [24]:
model = MyModel()

In [25]:
X = np.random.rand(97,100)
y = np.ones((97,100))

In [26]:
model(X, False)

<tf.Tensor: id=722, shape=(97, 100), dtype=float32, numpy=
array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]], dtype=float32)>

In [27]:
model.compile(optimizer='adam',loss='mse')

In [28]:
model.fit(x=X, y=y, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x129dbecf8>

In [29]:
model.layer.weight

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>

In [97]:
model.predict(X)

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]], dtype=float32)

In [10]:
a = np.array([[1.,2.,3.,4.,5.], [5.,4.,3.,2.,1.]], dtype=np.float32)
b = np.random.rand(2,2,2,2).astype(np.float32)
norm1 = keras.layers.BatchNormalization()
norm2 = keras.layers.BatchNormalization()

In [4]:
for x in range(1000):
    _ = norm1(a, training=True)
    _ = norm2(a, training=True)

In [5]:
norm1(a, training=False), norm2(a, training=False)

(<tf.Tensor: id=54085, shape=(2, 5), dtype=float32, numpy=
 array([[-0.9998265 , -0.99937105,  0.00400543,  0.99962974,  0.9999559 ],
        [ 0.9999559 ,  0.99962974,  0.00400543, -0.99937105, -0.9998265 ]],
       dtype=float32)>,
 <tf.Tensor: id=54098, shape=(2, 5), dtype=float32, numpy=
 array([[-0.9998265 , -0.99937105,  0.00400543,  0.99962974,  0.9999559 ],
        [ 0.9999559 ,  0.99962974,  0.00400543, -0.99937105, -0.9998265 ]],
       dtype=float32)>)

In [11]:
noisy_batch = BetterNoisyBatchNormalization(alpha=0.0)

In [7]:
for _ in range(1000):
    noisy_batch(a, training=True)

In [8]:
noisy_batch(a, training=False)

<tf.Tensor: id=103149, shape=(2, 5), dtype=float32, numpy=
array([[-0.9998265 , -0.99937105,  0.00400543,  0.99962974,  0.9999559 ],
       [ 0.9999559 ,  0.99962974,  0.00400543, -0.99937105, -0.9998265 ]],
      dtype=float32)>

In [12]:
norm1(b, training=True), norm2(b, training=True)

(<tf.Tensor: id=103195, shape=(2, 2, 2, 2), dtype=float32, numpy=
 array([[[[ 0.69621027, -0.13438885],
          [-0.3277291 ,  1.0909348 ]],
 
         [[ 1.4646038 ,  1.0638347 ],
          [-1.2575142 , -0.6498192 ]]],
 
 
        [[[-0.9798999 , -1.1927565 ],
          [ 0.8688836 ,  0.3792724 ]],
 
         [[-1.1709852 ,  1.0345124 ],
          [ 0.7064312 , -1.5915896 ]]]], dtype=float32)>,
 <tf.Tensor: id=103252, shape=(2, 2, 2, 2), dtype=float32, numpy=
 array([[[[ 0.69621027, -0.13438885],
          [-0.3277291 ,  1.0909348 ]],
 
         [[ 1.4646038 ,  1.0638347 ],
          [-1.2575142 , -0.6498192 ]]],
 
 
        [[[-0.9798999 , -1.1927565 ],
          [ 0.8688836 ,  0.3792724 ]],
 
         [[-1.1709852 ,  1.0345124 ],
          [ 0.7064312 , -1.5915896 ]]]], dtype=float32)>)

In [13]:
noisy_batch(b, training=True)

<tf.Tensor: id=103348, shape=(2, 2, 2, 2), dtype=float32, numpy=
array([[[[ 0.69621027, -0.13438885],
         [-0.3277291 ,  1.0909348 ]],

        [[ 1.4646038 ,  1.0638347 ],
         [-1.2575142 , -0.6498192 ]]],


       [[[-0.9798999 , -1.1927565 ],
         [ 0.8688836 ,  0.3792724 ]],

        [[-1.1709852 ,  1.0345124 ],
         [ 0.7064312 , -1.5915896 ]]]], dtype=float32)>

In [35]:
def batchnorm_forward(X):
    mu = np.mean(X, axis=(0,1,2))
    var = np.var(X, axis=(0,1,2))

    X_norm = (X - mu) / np.sqrt(var + 0.001)
    out = 1 * X_norm 

    return out

In [37]:
batchnorm_forward(b.astype(np.float64))

array([[[[-0.73704042, -0.18121228],
         [ 1.24844679,  0.9654738 ]],

        [[-0.80001376,  1.33775863],
         [ 1.05068317,  0.61308071]]],


       [[[-0.9519694 , -1.62714515],
         [-1.12262878,  0.39589516]],

        [[-0.06086089, -1.39443851],
         [ 1.3733833 , -0.10941236]]]])

In [14]:
norm1(b, training=False), norm2(b, training=False)

(<tf.Tensor: id=103355, shape=(2, 2, 2, 2), dtype=float32, numpy=
 array([[[[0.64110553, 0.55212283],
          [0.3444212 , 0.974324  ]],
 
         [[0.863746  , 0.96498626],
          [0.07501787, 0.3745246 ]]],
 
 
        [[[0.15545607, 0.18744847],
          [0.69113725, 0.7291115 ]],
 
         [[0.10008946, 0.9548829 ],
          [0.64406705, 0.05002537]]]], dtype=float32)>,
 <tf.Tensor: id=103366, shape=(2, 2, 2, 2), dtype=float32, numpy=
 array([[[[0.64110553, 0.55212283],
          [0.3444212 , 0.974324  ]],
 
         [[0.863746  , 0.96498626],
          [0.07501787, 0.3745246 ]]],
 
 
        [[[0.15545607, 0.18744847],
          [0.69113725, 0.7291115 ]],
 
         [[0.10008946, 0.9548829 ],
          [0.64406705, 0.05002537]]]], dtype=float32)>)

In [15]:
norm1.moving_mean, norm1.moving_variance

(<tf.Variable 'batch_normalization_v2_2/moving_mean:0' shape=(2,) dtype=float32, numpy=array([0.00442026, 0.00602151], dtype=float32)>,
 <tf.Variable 'batch_normalization_v2_2/moving_variance:0' shape=(2,) dtype=float32, numpy=array([0.99094033, 0.99133503], dtype=float32)>)

In [16]:
noisy_batch.moving_mean, noisy_batch.moving_variance

(<tf.Variable 'better_noisy_batch_normalization_1/moving_mean:0' shape=(2,) dtype=float32, numpy=array([0.00442026, 0.00602151], dtype=float32)>,
 <tf.Variable 'better_noisy_batch_normalization_1/moving_variance:0' shape=(2,) dtype=float32, numpy=array([0.99094033, 0.99133503], dtype=float32)>)

In [18]:
noisy_batch.gamma, noisy_batch.beta

(<tf.Variable 'better_noisy_batch_normalization_1/gamma:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>,
 <tf.Variable 'better_noisy_batch_normalization_1/beta:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>)

In [None]:
for x in range(1000):
    _ = norm1(b, training=True)
    _ = norm2(b, training=True)

In [None]:
for _ in range(1000):
    noisy_batch(b, training=True)

In [None]:
noisy_batch(b, b)