# Temporal U-Net with Squeezeformer Blocks:

In [2]:
import torch  # Import the PyTorch library
import torch.nn as nn  # Import the nn module from PyTorch

# Define the SqueezeformerBlock class
class SqueezeformerBlock(nn.Module):
    def __init__(self, dim, num_heads):
        super().__init__()  # Call the __init__ method of the parent class (nn.Module)
        self.mha = nn.MultiheadAttention(dim, num_heads)  # Initialize the multi-head attention layer
        # Initialize the feed-forward neural network
        self.ff = nn.Sequential(
            nn.Linear(dim, dim * 4),  # Linear layer with input_dim = dim and output_dim = dim * 4
            nn.SiLU(),  # SiLU activation function
            nn.Linear(dim * 4, dim)  # Linear layer with input_dim = dim * 4 and output_dim = dim
        )
        self.norm1 = nn.LayerNorm(dim)  # Layer normalization layer for input normalization
        self.norm2 = nn.LayerNorm(dim)  # Layer normalization layer for output normalization
        self.scale = nn.Parameter(torch.ones(1))  # Learnable scaling parameter

    def forward(self, x):
        # Multi-head attention
        x = x + self.mha(x, x, x)[0]  # Apply multi-head attention and add the result to the input
        x = self.norm1(x) * self.scale  # Normalize the result and scale it

        # Feed-forward
        x = x + self.ff(x)  # Apply the feed-forward network and add the result to the input
        x = self.norm2(x) * self.scale  # Normalize the result and scale it

        return x  # Return the output tensor

# Define the Squeezeformer class
class Squeezeformer(nn.Module):
    def __init__(self, num_blocks, dim, num_heads):
        super().__init__()  # Call the __init__ method of the parent class (nn.Module)
        # Initialize a list of SqueezeformerBlock instances
        self.blocks = nn.ModuleList([SqueezeformerBlock(dim, num_heads) for _ in range(num_blocks)])
        self.downsample = nn.Conv1d(dim, dim, kernel_size=3, stride=2, padding=1, groups=dim)  # Downsampling convolutional layer
        self.upsample = nn.ConvTranspose1d(dim, dim, kernel_size=4, stride=2, padding=1)  # Upsampling transposed convolutional layer

    def forward(self, x):
        skip = x  # Store the original input for skip connection
        for block in self.blocks[:len(self.blocks)//2]:  # Process the input through the first half of the blocks
            x = block(x)
        x = self.downsample(x.transpose(1, 2)).transpose(1, 2)  # Downsample the input sequence

        for block in self.blocks[len(self.blocks)//2:]:  # Process the downsampled input through the second half of the blocks
            x = block(x)
        x = self.upsample(x.transpose(1, 2)).transpose(1, 2)  # Upsample the processed sequence

        x = x + skip  # Add the skip connection to the upsampled output
        return x  # Return the final output tensor

# Example usage
if __name__ == "__main__":
    # Define the input dimensions and model hyperparameters
    input_dim = 128  # Set the input dimension
    sequence_length = 100  # Set the sequence length
    num_blocks = 4  # Set the number of blocks in the model
    num_heads = 4  # Set the number of attention heads

    # Create a random input tensor
    input_tensor = torch.randn(1, sequence_length, input_dim)

    # Initialize the Squeezeformer model
    model = Squeezeformer(num_blocks, input_dim, num_heads)

    # Pass the input through the model
    output_tensor = model(input_tensor)

    # Print the output shape
    print("Output shape:", output_tensor.shape)

Output shape: torch.Size([1, 100, 128])


# Depthwise Separable Convolution Subsampling:


In [3]:
import torch  # Import the PyTorch library
import torch.nn as nn  # Import the nn module from PyTorch

# Define the DepthwiseSeparableConv class
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride):
        super().__init__()  # Call the __init__ method of the parent class (nn.Module)
        # Define the depthwise convolutional layer
        self.depthwise = nn.Conv1d(in_channels, in_channels, kernel_size, stride, padding=kernel_size//2, groups=in_channels)
        # Define the pointwise convolutional layer
        self.pointwise = nn.Conv1d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        x = self.depthwise(x)  # Apply the depthwise convolution
        x = self.pointwise(x)  # Apply the pointwise convolution
        return x  # Return the output tensor

# Define the SubsamplingBlock class
class SubsamplingBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()  # Call the __init__ method of the parent class (nn.Module)
        # Define the first convolutional layer
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=2, padding=1)
        # Define the depthwise separable convolutional layer
        self.conv2 = DepthwiseSeparableConv(out_channels, out_channels, kernel_size=3, stride=2)

    def forward(self, x):
        x = self.conv1(x)  # Apply the first convolutional layer
        x = self.conv2(x)  # Apply the depthwise separable convolutional layer
        return x  # Return the output tensor

# Example usage
if __name__ == "__main__":
    input_dim = 128  # Set the input dimension
    sequence_length = 100  # Set the sequence length
    input_tensor = torch.randn(1, input_dim, sequence_length)  # Create a random input tensor
    model = SubsamplingBlock(input_dim, 256)  # Initialize the SubsamplingBlock with input_dim input channels and 256 output channels
    output_tensor = model(input_tensor)  # Pass the input tensor through the model
    print("Output shape:", output_tensor.shape)  # Print the shape of the output tensor

Output shape: torch.Size([1, 256, 25])


# 

Unified Activations with Squeezeformer Block:

In [None]:
import torch  # Import the PyTorch library
import torch.nn as nn  # Import the nn module from PyTorch

# Define the SqueezeformerBlock class
class SqueezeformerBlock(nn.Module):
    def __init__(self, dim, num_heads):
        super().__init__()  # Call the __init__ method of the parent class (nn.Module)
        self.mha = nn.MultiheadAttention(dim, num_heads)  # Initialize the multi-head attention layer
        # Initialize the feed-forward neural network
        self.ff = nn.Sequential(
            nn.Linear(dim, dim * 4),  # Linear layer with input_dim = dim and output_dim = dim * 4
            nn.SiLU(),  # SiLU activation function
            nn.Linear(dim * 4, dim)  # Linear layer with input_dim = dim * 4 and output_dim = dim
        )
        # Initialize the convolutional block
        self.conv = nn.Sequential(
            nn.Conv1d(dim, dim, kernel_size=3, padding=1, groups=dim),  # Depthwise convolution
            nn.BatchNorm1d(dim),  # Batch normalization
            nn.SiLU(),  # SiLU activation function
            nn.Conv1d(dim, dim, kernel_size=1),  # Pointwise convolution
            nn.BatchNorm1d(dim),  # Batch normalization
            nn.SiLU()  # SiLU activation function
        )
        self.norm1 = nn.LayerNorm(dim)  # Layer normalization layer for input normalization
        self.norm2 = nn.LayerNorm(dim)  # Layer normalization layer for output normalization after convolution
        self.norm3 = nn.LayerNorm(dim)  # Layer normalization layer for output normalization after feed-forward
        self.scale = nn.Parameter(torch.ones(1))  # Learnable scaling parameter

    def forward(self, x):
        # Multi-head attention
        x = x + self.mha(x, x, x)[0]  # Apply multi-head attention and add the result to the input
        x = self.norm1(x) * self.scale  # Normalize the result and scale it

        # Convolution
        x = x + self.conv(x.transpose(1, 2)).transpose(1, 2)  # Apply the convolutional block and add the result to the input
        x = self.norm2(x) * self.scale  # Normalize the result and scale it

        # Feed-forward
        x = x + self.ff(x)  # Apply the feed-forward network and add the result to the input
        x = self.norm3(x) * self.scale  # Normalize the result and scale it

        return x  # Return the output tensor

# Example usage
if __name__ == "__main__":
    input_dim = 128  # Set the input dimension
    sequence_length = 100  # Set the sequence length
    num_heads = 4  # Set the number of attention heads
    input_tensor = torch.randn(1, sequence_length, input_dim)  # Create a random input tensor
    model = SqueezeformerBlock(input_dim, num_heads)  # Initialize the SqueezeformerBlock
    output_tensor = model(input_tensor)  # Pass the input tensor through the model
    print("Output shape:", output_tensor.shape)  # Print the output shape