In [1]:
import tensorflow as tf
import keras
import numpy as np

In [2]:
class MyLayerNorm(tf.keras.layers.Layer):
	def __init__(self, **kwargs):
		super().__init__(**kwargs)
		
	def build(self, input_shape):
		'''
		Should define two trainable weight vectors alpha and 
		beta, both of which have shape input_shape[-1:] and data
		type tf.float32
		'''
		self.alpha = self.add_weight(
            name ="alpha", shape=input_shape[-1:], 
			dtype=tf.float32, initializer="ones", trainable=True
        )
		self.beta = self.add_weight(
            name ="beta", shape=input_shape[-1:], 
			dtype=tf.float32, initializer="zeros", trainable=True
        )
			
	def call(self, inputs):
		'''
		should compute the mean and shandard deviation of each
		instance's features
		
		should compute and return that equation
		'''
		mean, var = tf.nn.moments(inputs, axes=-1, keepdims=True)
		std = tf.sqrt(var + 1e-4)
		eq = (inputs - mean) / std
		return (self.alpha * eq) + self.beta
		
	def get_config(self):
		base_config = super().get_config()
		return{**base_config, "alpha": self.alpha, 
						"beta": self.beta}

In [3]:
# Load the dataset
(X_train_all, y_train_all), (X_test, y_test) = keras.datasets.cifar10.load_data()

X_valid, y_valid = X_train_all[-5000:], y_train_all[-5000:]
X_train, y_train = X_train_all[:-5000], y_train_all[:-5000]

In [4]:
X_train = (X_train / 255.)
X_valid = (X_valid / 255.) 
X_test = (X_test / 255.)

In [None]:
X_train.shape

In [5]:
layernorm = tf.keras.layers.LayerNormalization()

sample1 = X_train[:1]
norm_values1 = layernorm(sample1)

sample2 = X_train[1:2]
norm_values2 = layernorm(sample2)


print(norm_values1.shape)
print(norm_values2.shape)

(1, 32, 32, 3)
(1, 32, 32, 3)


In [10]:
new1 = tf.reshape(norm_values1, [-1])

In [11]:
new2 = tf.reshape(norm_values2, [-1])

In [15]:
diff = tf.subtract(new1, new2)

In [16]:
mean = tf.reduce_mean(diff)

In [17]:
print(mean)

tf.Tensor(5.8983765e-09, shape=(), dtype=float32)


In [None]:
layernorm = MyLayerNorm()

sample = X_train[:1]
norm_values = layernorm(sample)

print(norm_values)

In [None]:
print(np.mean(norm_values[0]))
print(np.std(norm_values[0]))

In [None]:
layer = tf.keras.layers.LayerNormalization()

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=[32, 32, 3]))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(100, activation="relu"))
model.add(layer)
model.add(tf.keras.layers.Dense(10, activation="softmax"))

In [None]:
flatten = tf.keras.layers.Flatten()
hidden_layer1 = tf.keras.layers.Dense(100, activation="relu")
layernorm = tf.keras.layers.LayerNormalization()
output = tf.keras.layers.Dense(10, activation="softmax")

In [None]:
input_ = tf.keras.layers.Input(shape=[32, 32, 3])
flattened = flatten(input_)
hidden1 = hidden_layer1(flattened)
normed = layernorm(hidden1)
outputs = output(normed)

In [None]:
model = tf.keras.Model(inputs=[input_], outputs=[outputs])

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [None]:
history = model.fit(
        X_train, y_train, 
        epochs=10,
        validation_data=(X_valid, y_valid), 
        batch_size=128
    )

In [None]:
print(outputs)

In [None]:
print(layer.beta.shape)
print(layer.gamma.shape)

In [None]:
print(layer.beta)

In [None]:
print(layer.gamma)