In [4]:
import torch
import torch.nn as nn
from torchviz import make_dot

class ConvEmbeddingStem(nn.Module):
    def __init__(self, in_channels=1, out_channels=16, dropout=0.3):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels // 2, kernel_size=10, stride=2, bias=False, padding=4)
        self.act1 = nn.GELU()
        self.bn1 = nn.BatchNorm1d(out_channels // 2)
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(out_channels // 2, out_channels, kernel_size=3, stride=1, bias=False, padding=1)
        self.act2 = nn.GELU()
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.dropout2 = nn.Dropout(dropout)

        self.conv3 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=2, bias=False, padding=1)
        self.act3 = nn.GELU()
        self.bn3 = nn.BatchNorm1d(out_channels)
        self.dropout3 = nn.Dropout(dropout)

        self.init_weight()

    def init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def forward(self, x):
        x = self.bn1(self.act1(self.conv1(x)))
        x = self.dropout1(x)
        x = self.bn2(self.act2(self.conv2(x)))
        x = self.dropout2(x)
        x = self.bn3(self.act3(self.conv3(x)))
        x = self.dropout3(x)
        return x

class MultiheadSelfAttentionBlock(nn.Module):
    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, dropout=0.):
        super().__init__()
        self.norm1 = nn.LayerNorm(dim)
        self.attn = nn.MultiheadAttention(dim, num_heads, dropout=dropout, bias=qkv_bias)
        self.norm2 = nn.LayerNorm(dim)
        self.mlp = nn.Sequential(
            nn.Linear(dim, int(dim * mlp_ratio)),
            nn.GELU(),
            nn.Linear(int(dim * mlp_ratio), dim),
            nn.Dropout(dropout)
        )
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x + self.dropout(self.attn(self.norm1(x), self.norm1(x), self.norm1(x))[0])
        x = x + self.dropout(self.mlp(self.norm2(x)))
        return x

class TransformerBlock(nn.Module):
    def __init__(self, transformer_dim, num_heads, transformer_depth, dropout):
        super(TransformerBlock, self).__init__()
        self.blocks = nn.ModuleList(
            [MultiheadSelfAttentionBlock(dim=transformer_dim, num_heads=num_heads, dropout=dropout) for _ in range(transformer_depth)]
        )

    def forward(self, x):
        for block in self.blocks:
            x = block(x)
        return x

class FullyConnectedLayers(nn.Module):
    def __init__(self, input_size, fc_neurons, num_classes, dropout, activation):
        super(FullyConnectedLayers, self).__init__()
        layers = [nn.Linear(input_size, fc_neurons[0])]
        layers.append(activation)
        layers.append(nn.Dropout(dropout))
        for i in range(len(fc_neurons) - 1):
            layers.append(nn.Linear(fc_neurons[i], fc_neurons[i + 1]))
            layers.append(activation)
            layers.append(nn.Dropout(dropout))
        layers.append(nn.Linear(fc_neurons[-1], num_classes))
        self.fc_layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.fc_layers(x)

class ConvTransformerModel(nn.Module):
    def __init__(self, input_size: int, num_classes: int, 
                 transformer_dim: int = 128, num_heads: int = 4, 
                 transformer_depth: int = 2,
                 fc_neurons: list = [512, 128],
                 fc_transformer: int = 128,
                 dropout: float = 0.3, activation: nn.Module = nn.ReLU()):
        super(ConvTransformerModel, self).__init__()

        # Convolutional Embedding Stem
        self.conv_embedding_stem = ConvEmbeddingStem(in_channels=1, out_channels=transformer_dim, dropout=dropout)
        
        # Transformer Blocks
        self.transformer_blocks = TransformerBlock(transformer_dim, num_heads, transformer_depth, dropout)
        self.fc_transformer = nn.Linear(transformer_dim, fc_transformer)

        # Fully connected layers for output
        self.fc_layers = FullyConnectedLayers(fc_transformer, fc_neurons, num_classes, dropout, activation)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Transformer path
        x_transformer = self.conv_embedding_stem(x)
        x_transformer = x_transformer.permute(2, 0, 1)  # Adjust shape for Transformer
        x_transformer = self.transformer_blocks(x_transformer)
        x_transformer = self.fc_transformer(x_transformer[-1])  # Take the last output of Transformer

        # Fully connected layers
        x = self.fc_layers(x_transformer)
        return x

# Create a model instance
model = ConvTransformerModel(input_size=500, num_classes=4, transformer_dim=128, num_heads=4, transformer_depth=2)

# Create a dummy input tensor with the appropriate size
dummy_input = torch.randn(1, 1, 500)  # Batch size of 1, 1 channel, input size of 500

# Generate the graph
y = model(dummy_input)
graph = make_dot(y, params=dict(model.named_parameters()), show_attrs=False, show_saved=True)

# Save the graph to a file
graph.format = 'png'
graph.render('model_architecture_blocks')

print("Model architecture saved as 'model_architecture_blocks.png'")


Model architecture saved as 'model_architecture_blocks.png'


In [15]:
from graphviz import Digraph

def create_detailed_model_diagram(block_width=2, block_height=1):
    dot = Digraph(comment='ConvTransformerModel', format='png')
    dot.attr(rankdir='LR')  # Landscape orientation

    # Define main block nodes
    dot.node('A', 'Input\nShape: (1, 500)', shape='rect', style='filled', color='lightblue', width=str(block_width), height=str(block_height))
    
    # ConvEmbeddingStem Block with subblocks and subsubblocks
    with dot.subgraph(name='cluster_ConvEmbeddingStem') as c:
        c.attr(label='ConvEmbeddingStem', style='filled', color='lightgrey')
        
        with c.subgraph(name='cluster_Conv1') as c1:
            c1.attr(label='Conv1D Block 1', style='filled', color='lightyellow')
            c1.node('B1', 'Conv1d: 10, Stride: 2', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c1.node('B1a', 'GELU', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c1.node('B1b', 'BatchNorm', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c1.node('B1c', 'Dropout', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c1.edge('B1', 'B1a')
            c1.edge('B1a', 'B1b')
            c1.edge('B1b', 'B1c')
        
        with c.subgraph(name='cluster_Conv2') as c2:
            c2.attr(label='Conv1D Block 2', style='filled', color='lightyellow')
            c2.node('B2', 'Conv1d: 3, Stride: 1', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c2.node('B2a', 'GELU', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c2.node('B2b', 'BatchNorm', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c2.node('B2c', 'Dropout', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c2.edge('B2', 'B2a')
            c2.edge('B2a', 'B2b')
            c2.edge('B2b', 'B2c')

        with c.subgraph(name='cluster_Conv3') as c3:
            c3.attr(label='Conv1D Block 3', style='filled', color='lightyellow')
            c3.node('B3', 'Conv1d: 3, Stride: 2', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c3.node('B3a', 'GELU', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c3.node('B3b', 'BatchNorm', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c3.node('B3c', 'Dropout', shape='rect', style='filled', color='yellow', width=str(block_width), height=str(block_height))
            c3.edge('B3', 'B3a')
            c3.edge('B3a', 'B3b')
            c3.edge('B3b', 'B3c')

        c.edge('cluster_Conv1', 'cluster_Conv2')
        c.edge('cluster_Conv2', 'cluster_Conv3')

    # TransformerBlock with subblocks and subsubblocks
    with dot.subgraph(name='cluster_TransformerBlock') as t:
        t.attr(label='Transformer Block (x2)', style='filled', color='lightgrey')
        
        with t.subgraph(name='cluster_Transformer1') as t1:
            t1.attr(label='Transformer Block 1', style='filled', color='lightgreen')
            t1.node('C1', 'Multihead Self-Attention\n4 heads', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t1.node('C1a', 'GELU', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t1.node('C1b', 'Dropout', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t1.node('C2', 'LayerNorm', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t1.node('C2a', 'MLP', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t1.node('C2b', 'GELU', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t1.node('C2c', 'Dropout', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t1.edge('C1', 'C1a')
            t1.edge('C1a', 'C1b')
            t1.edge('C1b', 'C2')
            t1.edge('C2', 'C2a')
            t1.edge('C2a', 'C2b')
            t1.edge('C2b', 'C2c')

        with t.subgraph(name='cluster_Transformer2') as t2:
            t2.attr(label='Transformer Block 2', style='filled', color='lightgreen')
            t2.node('C3', 'Multihead Self-Attention\n4 heads', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t2.node('C3a', 'GELU', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t2.node('C3b', 'Dropout', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t2.node('C4', 'LayerNorm', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t2.node('C4a', 'MLP', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t2.node('C4b', 'GELU', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t2.node('C4c', 'Dropout', shape='rect', style='filled', color='green', width=str(block_width), height=str(block_height))
            t2.edge('C3', 'C3a')
            t2.edge('C3a', 'C3b')
            t2.edge('C3b', 'C4')
            t2.edge('C4', 'C4a')
            t2.edge('C4a', 'C4b')
            t2.edge('C4b', 'C4c')

        t.edge('cluster_Transformer1', 'cluster_Transformer2', label='Repeat x2', style='dashed')

    # FullyConnected Layers with subblocks and subsubblocks
    with dot.subgraph(name='cluster_FullyConnected') as f:
        f.attr(label='FullyConnected', style='filled', color='lightgrey')
        
        with f.subgraph(name='cluster_FC1') as fc1:
            fc1.attr(label='Fully Connected Layer 1', style='filled', color='lightcoral')
            fc1.node('D1', 'FC: 512 -> 128', shape='rect', style='filled', color='coral', width=str(block_width), height=str(block_height))
            fc1.node('D1a', 'ReLU', shape='rect', style='filled', color='coral', width=str(block_width), height=str(block_height))
            fc1.node('D1b', 'Dropout', shape='rect', style='filled', color='coral', width=str(block_width), height=str(block_height))
            fc1.edge('D1', 'D1a')
            fc1.edge('D1a', 'D1b')

        with f.subgraph(name='cluster_FC2') as fc2:
            fc2.attr(label='Fully Connected Layer 2', style='filled', color='lightcoral')
            fc2.node('D2', 'FC: 128 -> 128', shape='rect', style='filled', color='coral', width=str(block_width), height=str(block_height))
            fc2.node('D2a', 'ReLU', shape='rect', style='filled', color='coral',width=str(block_width), height=str(block_height))
            fc2.node('D2b', 'Dropout', shape='rect', style='filled', color='coral', width=str(block_width), height=str(block_height))
            fc2.edge('D2', 'D2a')
            fc2.edge('D2a', 'D2b')

        f.edge('cluster_FC1', 'cluster_FC2')

    # Output Layer
    dot.node('E', 'Output\nClasses: 4\nShape: (Batch, 4)', shape='rect', style='filled', color='lightblue', width=str(block_width), height=str(block_height))

    # Add edges to show the flow
    dot.edge('A', 'cluster_ConvEmbeddingStem')
    dot.edge('cluster_ConvEmbeddingStem', 'cluster_TransformerBlock')
    dot.edge('cluster_TransformerBlock', 'cluster_FullyConnected')
    dot.edge('cluster_FullyConnected', 'E')

    # Save and render the diagram
    dot.render('detailed_model_diagram', format='png')

    return dot

# Create the detailed model diagram with specified block width and height
dot = create_detailed_model_diagram(block_width=2, block_height=1)
dot.view()

print("Detailed model block diagram saved as 'detailed_model_diagram.png'")



Detailed model block diagram saved as 'detailed_model_diagram.png'
