In [None]:
import torch
import torch.nn as nn

In [None]:
import random
random.seed(24)  # Python random seed
torch.manual_seed(24)  # PyTorch seed (CPU)

In [None]:
# Set print options: No scientific notation, 2 decimal places
torch.set_printoptions(sci_mode=False, precision=4)

# Initialize LayerNorm with 10 features

# Create a random input tensor with shape (batch_size=3, features=10)

![Description](layer_norm_image.png)

# Compute mean and std of x before normalization

In [None]:
# Compute mean and std of x before normalization
print("Before LayerNorm:")
print("Input:", x)
print("Mean:", x.mean(dim=-1, keepdim=True))
print("Std:", x.std(dim=-1, unbiased=False, keepdim=True))

# Apply Layer Normalization

In [None]:
print("Output:", output)

# Print the gain (γ) and bias (β)

In [None]:
# Print the gain (γ) and bias (β)
print("Gain (γ):", layer_norm.weight)  # Scaling parameter
print("Bias (β):", layer_norm.bias)    # Shifting parameter

# Compute mean and std of x after normalization

In [None]:
# Compute mean and std of x after normalization
print("\nAfter LayerNorm:")
print("Mean:", output.mean(dim=-1, keepdim=True))  # Should be close to 0
print("Std:", output.std(dim=-1, unbiased=False, keepdim=True))  # Should be close to 1

# Simple Model to demonstrate gamma and beta parameters

In [None]:
import torch.optim as optim

# Dummy dataset: 10 samples, 5 features each
x = torch.randn(10, 5)
y = torch.randn(10, 5)  # Regression target

# Define a simple model with LayerNorm
class SimpleModel(nn.Module):
    def __init__(self, feature_dim):
        super(SimpleModel, self).__init__()
        self.layernorm = nn.LayerNorm(feature_dim)
        self.fc = nn.Linear(feature_dim, feature_dim)  # Fully connected layer

    def forward(self, x):
        x = self.layernorm(x)  # Apply LayerNorm
        return self.fc(x)  # Pass through a linear layer

# Initialize model
model = SimpleModel(feature_dim=5)

# Print initial gamma and beta

In [None]:
# Print initial gamma and beta
print("Before Training:")
print("Initial Gain (γ):", model.layernorm.weight.data)
print("Initial Bias (β):", model.layernorm.bias.data)

# Train for one iteration

In [None]:
# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Train for one iteration
for epoch in range(1):
    optimizer.zero_grad()
    outputs = model(x)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()

# Print gamma and beta after one training step

In [None]:
# Print gamma and beta after one training step
print("\nAfter Training One Iteration:")
print("Updated Gain (γ):", model.layernorm.weight.data)
print("Updated Bias (β):", model.layernorm.bias.data)