In [4]:
import torch
from adaptive_dropout import AdaptiveInformationDropout, OptimizerConfig

#### Define Simple MSE-based Information Loss Function

In [2]:
import torch

def calculate_information_loss(pre_dropout: torch.Tensor, post_dropout: torch.Tensor, properties: dict = None) -> torch.Tensor:
    """
    Calculate a simple information loss between pre- and post-dropout vectors
    using normalized mean squared error.
    
    This naive approach:
    1. Calculates the mean squared error between vectors
    2. Normalizes it by the magnitude of the pre-dropout vector
    3. Returns a value between 0 and 1 where:
       - 0 means no information loss (vectors are identical)
       - 1 means maximum information loss (vectors are completely different)
    
    Args:
        pre_dropout: Original tensor before dropout
        post_dropout: Tensor after dropout application
        properties: Optional additional properties (not used in this simple version)
        
    Returns:
        torch.Tensor: Scalar value representing information loss
    """
    # Ensure inputs are float tensors
    pre = pre_dropout.float()
    post = post_dropout.float()
    
    # Calculate mean squared error
    mse = torch.mean((pre - post) ** 2)
    
    # Normalize by the magnitude of the pre-dropout vector
    # Add small epsilon to prevent division by zero
    magnitude = torch.mean(pre ** 2) + 1e-8
    
    # Calculate normalized loss
    loss = mse / magnitude
    
    # Clip to ensure value is between 0 and 1
    loss = torch.clamp(loss, 0, 1)
    
    return loss


#### Example

In [3]:
# Create sample data
torch.manual_seed(123)
initial_p = 0.2
information_loss_threshold  = 0.10
batch_size = 32
feature_dim = 10
x = torch.randn(batch_size, feature_dim)

# Configure the dropout layer
optimizer_config = OptimizerConfig(
    max_iterations=30,
    learning_rate=0.1,
    decay_rate=0.9,
    stopping_error=0.001
)

# Create the adaptive dropout layer
dropout = AdaptiveInformationDropout(
    initial_p = initial_p,
    calc_information_loss=calculate_information_loss,
    information_loss_threshold = information_loss_threshold,
    optimizer_config=optimizer_config,
    name = 'Example Layer',
    verbose=2
)

# Apply dropout
output = dropout(x)

print("Input shape:", x.shape)
print("Output shape:", output.shape)

print()
print("Initial dropout rate:", initial_p)
print("Final dropout rate:", dropout.p.item())

Example Layer: Current Dropout Rate: 20.0% | Loss: 0.253
Example Layer: Current Dropout Rate: 18.8% | Loss: 0.253
Example Layer: Current Dropout Rate: 17.5% | Loss: 0.206
Example Layer: Current Dropout Rate: 16.5% | Loss: 0.177
Example Layer: Current Dropout Rate: 15.8% | Loss: 0.232
Example Layer: Current Dropout Rate: 14.6% | Loss: 0.127
Example Layer: Current Dropout Rate: 14.4% | Loss: 0.172
Example Layer: Current Dropout Rate: 13.7% | Loss: 0.183
Example Layer: Current Dropout Rate: 12.9% | Loss: 0.176
Example Layer: Current Dropout Rate: 12.2% | Loss: 0.094
Example Layer: Current Dropout Rate: 12.3% | Loss: 0.149
Example Layer: Current Dropout Rate: 11.9% | Loss: 0.115
Example Layer: Current Dropout Rate: 11.7% | Loss: 0.100
Example Layer: Final Dropout Rate: 11.7% | Loss: 0.100

Input shape: torch.Size([32, 10])
Output shape: torch.Size([32, 10])

Initial dropout rate: 0.2
Final dropout rate: 0.11726853707942174
