In [2]:
import tensorflow as tf
import keras
import numpy as np

In [3]:
class MyLayerNorm(tf.keras.layers.Layer):
	def __init__(self, **kwargs):
		super().__init__(**kwargs)
		
	def build(self, input_shape):
		'''
		Should define two trainable weight vectors alpha and 
		beta, both of which have shape input_shape[-1:] and data
		type tf.float32
		'''
		self.alpha = self.add_weight(
            name ="alpha", shape=input_shape[-1:], 
			dtype=tf.float32, initializer="ones", trainable=True
        )
		self.beta = self.add_weight(
            name ="beta", shape=input_shape[-1:], 
			dtype=tf.float32, initializer="zeros", trainable=True
        )
			
	def call(self, inputs):
		'''
		should compute the mean and shandard deviation of each
		instance's features
		
		should compute and return that equation
		'''
		mean, var = tf.nn.moments(inputs, axes=-1, keepdims=True)
		std = tf.sqrt(var + 1e-4)
		eq = (inputs - mean) / std
		return (self.alpha * eq) + self.beta
		
	def get_config(self):
		base_config = super().get_config()
		return{**base_config, "alpha": self.alpha, 
						"beta": self.beta}

In [4]:
# Load the dataset
(X_train_all, y_train_all), (X_test, y_test) = keras.datasets.cifar10.load_data()

X_valid, y_valid = X_train_all[-5000:], y_train_all[-5000:]
X_train, y_train = X_train_all[:-5000], y_train_all[:-5000]

In [5]:
X_train = (X_train / 255.)
X_valid = (X_valid / 255.) 
X_test = (X_test / 255.)

In [None]:
X_train.shape

In [6]:
layernorm = tf.keras.layers.LayerNormalization()

sample1 = X_train[:1]
norm_values1 = layernorm(sample1)

sample2 = X_train[1:2]
norm_values2 = layernorm(sample2)


print(norm_values1.shape)
print(norm_values2.shape)

print(norm_values1)

(1, 32, 32, 3)
(1, 32, 32, 3)
tf.Tensor(
[[[[-0.28313756  0.08089644  0.20224111]
   [-0.2042561   0.16340488  0.04085122]
   [ 0.34947175  0.11649058 -0.46596232]
   ...
   [ 1.1540048  -0.03036855 -1.1236362 ]
   [ 1.1683812  -0.06069513 -1.107686  ]
   [ 1.145676   -0.049812   -1.095864  ]]

  [[-0.32200918  0.16100459  0.16100459]
   [ 0.          0.          0.        ]
   [ 0.8546979  -0.06104985 -0.79364806]
   ...
   [ 1.1875043  -0.02305834 -1.164446  ]
   [ 1.1941673  -0.03411906 -1.1600481 ]
   [ 1.2007973  -0.06003987 -1.1407574 ]]

  [[ 0.20224111  0.08089644 -0.28313756]
   [ 0.80219525 -0.06417562 -0.73801965]
   [ 1.1294703  -0.0537843  -1.0756861 ]
   ...
   [ 1.1761315   0.         -1.1761315 ]
   [ 1.1897933  -0.02244893 -1.1673443 ]
   [ 1.2029538  -0.05839581 -1.144558  ]]

  ...

  [[ 1.0593573   0.25424576 -1.313603  ]
   [ 1.0142049   0.33492345 -1.3491282 ]
   [ 0.9368974   0.43931073 -1.3762081 ]
   ...
   [ 1.0113593   0.31118748 -1.3225467 ]
   [ 1.1435853  

In [10]:
new1 = tf.reshape(norm_values1, [-1])

In [11]:
new2 = tf.reshape(norm_values2, [-1])

In [15]:
diff = tf.subtract(new1, new2)

In [16]:
mean = tf.reduce_mean(diff)

In [17]:
print(mean)

tf.Tensor(5.8983765e-09, shape=(), dtype=float32)


In [7]:
layernorm = MyLayerNorm()

sample = X_train[:1]
norm_values = layernorm(sample)

print(norm_values)

tf.Tensor(
[[[[-0.7613982   0.21754235  0.54385585]
   [-0.5871291   0.4697033   0.11742582]
   [ 0.7702654   0.2567551  -1.0270205 ]
   ...
   [ 1.2311792  -0.03239945 -1.1987802 ]
   [ 1.2463983  -0.06474794 -1.18165   ]
   [ 1.239172   -0.05387745 -1.1852953 ]]

  [[-0.84083676  0.42041838  0.42041838]
   [ 0.          0.          0.        ]
   [ 1.1977165  -0.08555117 -1.1121653 ]
   ...
   [ 1.2313913  -0.02391051 -1.2074809 ]
   [ 1.2370484  -0.03534424 -1.2017043 ]
   [ 1.2491685  -0.06245843 -1.1867102 ]]

  [[ 0.54385585  0.21754235 -0.7613982 ]
   [ 1.1857501  -0.09485999 -1.0908899 ]
   [ 1.2392256  -0.05901074 -1.1802148 ]
   ...
   [ 1.2196105   0.         -1.2196106 ]
   [ 1.2313497  -0.02323302 -1.2081169 ]
   [ 1.2486433  -0.06061375 -1.1880296 ]]

  ...

  [[ 1.0735524   0.25765258 -1.3312051 ]
   [ 1.0202008   0.33690354 -1.3571047 ]
   [ 0.9418953   0.44165438 -1.3835492 ]
   ...
   [ 1.0318679   0.31749782 -1.3493658 ]
   [ 1.2230949  -0.01652831 -1.2065667 ]
   [ 

In [None]:
print(np.mean(norm_values[0]))
print(np.std(norm_values[0]))

In [None]:
layer = tf.keras.layers.LayerNormalization()

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=[32, 32, 3]))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(100, activation="relu"))
model.add(layer)
model.add(tf.keras.layers.Dense(10, activation="softmax"))

In [None]:
flatten = tf.keras.layers.Flatten()
hidden_layer1 = tf.keras.layers.Dense(100, activation="relu")
layernorm = tf.keras.layers.LayerNormalization()
output = tf.keras.layers.Dense(10, activation="softmax")

In [None]:
input_ = tf.keras.layers.Input(shape=[32, 32, 3])
flattened = flatten(input_)
hidden1 = hidden_layer1(flattened)
normed = layernorm(hidden1)
outputs = output(normed)

In [None]:
model = tf.keras.Model(inputs=[input_], outputs=[outputs])

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [None]:
history = model.fit(
        X_train, y_train, 
        epochs=10,
        validation_data=(X_valid, y_valid), 
        batch_size=128
    )

In [None]:
print(outputs)

In [None]:
print(layer.beta.shape)
print(layer.gamma.shape)

In [None]:
print(layer.beta)

In [None]:
print(layer.gamma)