In [1]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable
from collections import OrderedDict
import numpy as np

# Define a neural network class
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.pool = nn.MaxPool2d(2, 2) 
        self.batchNorm1 = nn.BatchNorm2d(8)       
        self.batchNorm2 = nn.BatchNorm2d(16)       
        self.batchNorm3 = nn.BatchNorm2d(32)       
        self.batchNorm4 = nn.BatchNorm2d(64)       
        self.batchNorm5 = nn.BatchNorm2d(128)       
        self.batchNorm6 = nn.BatchNorm2d(128)       
        self.conv1 = nn.Conv2d(3, 8, 3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv4 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv5 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv6 = nn.Conv2d(128, 128, 3, padding=1)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 10)
        self.dropout = nn.Dropout(p=0)

    def forward(self, x):
        x = F.relu(self.batchNorm1(self.conv1(x))) # 32 -> 32
        x = self.pool(F.relu(self.batchNorm2(self.conv2(x)))) # 32 -> 32 -> 16
        x = F.relu(self.batchNorm3(self.conv3(x))) # 16 -> 16
        x = self.pool(F.relu(self.batchNorm4(self.conv4(x)))) # 16 -> 16 -> 8
        x = F.relu(self.batchNorm5(self.conv5(x))) # 8 -> 8
        x = self.pool(F.relu(self.batchNorm6(self.conv6(x)))) # 8 -> 8 -> 4
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Set the device to CUDA if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create an instance of the neural network
net = Net()

# Move the neural network to the specified device (GPU if available)
net.to(device)

# Define a function to summarize the model
def summary(model, input_size, batch_size=-1, device="cuda") -> tuple:
    # Define a nested function to register hooks for each module
    def register_hook(module):
        def hook(module, input, output):
            # Get the class name of the module
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)

            # Create a key for the module in the summary dictionary
            m_key = "%s-%i" % (class_name, module_idx + 1)
            summary[m_key] = OrderedDict()
            summary[m_key]["index"] = module_idx + 1
            summary[m_key]["layer_type"] = class_name

            # Store the input shape in the summary dictionary
            summary[m_key]["input_shape"] = list(input[0].size())
            summary[m_key]["input_shape"][0] = batch_size

            # Store the output shape in the summary dictionary
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [
                    [-1] + list(o.size())[1:] for o in output
                ]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            # Calculate the number of parameters in the module
            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params

        # Register the forward hook for the module
        if (
            not isinstance(module, nn.Sequential)
            and not isinstance(module, nn.ModuleList)
            and not (module == model)
        ):
            hooks.append(module.register_forward_hook(hook))

    # Convert the device name to lowercase
    device = device.lower()

    # Check if the device is valid
    assert device in [
        "cuda",
        "cpu",
    ], "Input device is not valid, please specify 'cuda' or 'cpu'"

    # Set the data type based on the device
    if device == "cuda" and torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor

    # Convert input_size to a list if it is a tuple
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # Create random input tensors for each input size
    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]

    # Create an ordered dictionary to store the summary
    summary = OrderedDict()

    # Create a list to store the hooks
    hooks = []

    # Register hooks for each module in the model
    model.apply(register_hook)

    # Make a forward pass through the model
    model(*x)

    # Remove the hooks
    for h in hooks:
        h.remove()

    # Print the summary
    print("------------------------------------------------------------------------")
    line_new = "{:<5}  {:>20}  {:>25} {:>15}".format("Index", "Layer (type)", "Output Shape", "Param #")
    print(line_new)
    print("========================================================================")
    total_params = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        line_new = "{:^5}  {:>20}  {:>25} {:>15}".format(
            layer.split("-")[1],
            layer.split("-")[0],
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]
        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        print(line_new)

    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
    total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))
    total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
    total_size = total_params_size + total_output_size + total_input_size

    print("========================================================================")
    print("Total params: {0:,}".format(total_params))
    print("Trainable params: {0:,}".format(trainable_params))
    print("Non-trainable params: {0:,}".format(total_params - trainable_params))
    print("------------------------------------------------------------------------")
    print("Input size (MB): %0.2f" % total_input_size)
    print("Forward/backward pass size (MB): %0.2f" % total_output_size)
    print("Params size (MB): %0.2f" % total_params_size)
    print("Estimated Total Size (MB): %0.2f" % total_size)
    print("-------------------------------------------------------------------------")

    return summary

# Call the summary function to summarize the neural network
summary_nn = summary(net, (3, 32, 32))

------------------------------------------------------------------------
Index          Layer (type)               Output Shape         Param #
  1                  Conv2d            [-1, 8, 32, 32]             224
  2             BatchNorm2d            [-1, 8, 32, 32]              16
  3                  Conv2d           [-1, 16, 32, 32]           1,168
  4             BatchNorm2d           [-1, 16, 32, 32]              32
  5               MaxPool2d           [-1, 16, 16, 16]               0
  6                  Conv2d           [-1, 32, 16, 16]           4,640
  7             BatchNorm2d           [-1, 32, 16, 16]              64
  8                  Conv2d           [-1, 64, 16, 16]          18,496
  9             BatchNorm2d           [-1, 64, 16, 16]             128
 10               MaxPool2d             [-1, 64, 8, 8]               0
 11                  Conv2d            [-1, 128, 8, 8]          73,856
 12             BatchNorm2d            [-1, 128, 8, 8]             256
 13 

In [2]:
import pandas as pd
import pandas as pd

for i, layer in enumerate(summary_nn):
    print(i+1, layer)

for layer, properties in summary_nn.items():
    for key, value in properties.items():
        print(key, value)

# df_summary = pd.DataFrame.from_dict(summary_nn, columns=['index', 'layer_type', 'input_shape', 'output_shape', 'nb_params'], orient='index')
# df_summary

print('summary_nn \n', summary_nn, '\n')


# Extract the inner dictionary values from summary_nn
inner_dict = [values for values in summary_nn.values()]
print(inner_dict)

# Create the DataFrame without the first level of the dictionary
df_summary = pd.DataFrame(inner_dict)
df_summary
# print(df_summary)


1 Conv2d-1
2 BatchNorm2d-2
3 Conv2d-3
4 BatchNorm2d-4
5 MaxPool2d-5
6 Conv2d-6
7 BatchNorm2d-7
8 Conv2d-8
9 BatchNorm2d-9
10 MaxPool2d-10
11 Conv2d-11
12 BatchNorm2d-12
13 Conv2d-13
14 BatchNorm2d-14
15 MaxPool2d-15
16 Linear-16
17 Dropout-17
18 Linear-18
19 Dropout-19
20 Linear-20
21 Linear-21
index 1
layer_type Conv2d
input_shape [-1, 3, 32, 32]
output_shape [-1, 8, 32, 32]
trainable True
nb_params tensor(224)
index 2
layer_type BatchNorm2d
input_shape [-1, 8, 32, 32]
output_shape [-1, 8, 32, 32]
trainable True
nb_params tensor(16)
index 3
layer_type Conv2d
input_shape [-1, 8, 32, 32]
output_shape [-1, 16, 32, 32]
trainable True
nb_params tensor(1168)
index 4
layer_type BatchNorm2d
input_shape [-1, 16, 32, 32]
output_shape [-1, 16, 32, 32]
trainable True
nb_params tensor(32)
index 5
layer_type MaxPool2d
input_shape [-1, 16, 32, 32]
output_shape [-1, 16, 16, 16]
nb_params 0
index 6
layer_type Conv2d
input_shape [-1, 16, 16, 16]
output_shape [-1, 32, 16, 16]
trainable True
nb_params te

Unnamed: 0,index,layer_type,input_shape,output_shape,trainable,nb_params
0,1,Conv2d,"[-1, 3, 32, 32]","[-1, 8, 32, 32]",True,tensor(224)
1,2,BatchNorm2d,"[-1, 8, 32, 32]","[-1, 8, 32, 32]",True,tensor(16)
2,3,Conv2d,"[-1, 8, 32, 32]","[-1, 16, 32, 32]",True,tensor(1168)
3,4,BatchNorm2d,"[-1, 16, 32, 32]","[-1, 16, 32, 32]",True,tensor(32)
4,5,MaxPool2d,"[-1, 16, 32, 32]","[-1, 16, 16, 16]",,0
5,6,Conv2d,"[-1, 16, 16, 16]","[-1, 32, 16, 16]",True,tensor(4640)
6,7,BatchNorm2d,"[-1, 32, 16, 16]","[-1, 32, 16, 16]",True,tensor(64)
7,8,Conv2d,"[-1, 32, 16, 16]","[-1, 64, 16, 16]",True,tensor(18496)
8,9,BatchNorm2d,"[-1, 64, 16, 16]","[-1, 64, 16, 16]",True,tensor(128)
9,10,MaxPool2d,"[-1, 64, 16, 16]","[-1, 64, 8, 8]",,0


In [3]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from collections import defaultdict

color_map = defaultdict(dict)

color_map = {
    'Linear': 
        {   'fill': 'blue',
            'outline': 'black'},
    'Conv1d': 
        {   'fill': 'purple',
            'outline': 'black'},
    'Conv2d': 
        {   'fill': 'orange',
            'outline': 'black'},
    'Conv3d': 
        {   'fill': 'red',
            'outline': 'black'},
    'ConvTranspose1d': 
        {   'fill': 'green',
            'outline': 'black'},
    'ConvTranspose2d': 
        {   'fill': 'teal',
            'outline': 'black'},
    'ConvTranspose3d': 
        {   'fill': 'yellow',
            'outline': 'black'},
    'BatchNorm1d': 
        {   'fill': 'green',
            'outline': 'black'},
    'BatchNorm2d': 
        {   'fill': 'green',
            'outline': 'black'},
    'BatchNorm3d': 
        {   'fill': 'green',
            'outline': 'black'},
    'MaxPool1d': 
        {   'fill': 'cyan',
            'outline': 'black'},
    'MaxPool2d': 
        {   'fill': 'red',
            'outline': 'black'},
    'MaxPool3d': 
        {   'fill': 'lime',
            'outline': 'black'},
    'AvgPool1d': 
        {   'fill': 'olive',
            'outline': 'black'},
    'AvgPool2d': 
        {   'fill': 'navy',
            'outline': 'black'},
    'AvgPool3d': 
        {   'fill': 'maroon',
            'outline': 'black'},
    'Dropout': 
        {   'fill': 'gray',
            'outline': 'black'},
    'Dropout2d': 
        {   'fill': 'gray',
            'outline': 'black'},
    'Dropout3d': 
        {   'fill': 'gray',
            'outline': 'black'},
    'ReLU': 
        {   'fill': 'darkblue',
            'outline': 'black'},
    'Sigmoid': 
        {   'fill': 'darkorange',
            'outline': 'black'},
    'Tanh': 
        {   'fill': 'darkred',
            'outline': 'black'},
    'Softmax': 
        {   'fill': 'darkgreen',
            'outline': 'black'},
    'Embedding': 
        {   'fill': 'darkviolet',
            'outline': 'black'},}

# def generate_network_diagram(df_summary):
#     fig = plt.figure(figsize=(10, 8))
#     ax = fig.add_subplot(111, projection='3d')

#     # Set node positions
#     x = df_summary.index
#     y = df_summary.index
#     z = df_summary['nb_params']

#     # Set node colors
#     colors = df_summary['layer_type'].map(color_map)

#     # Plot nodes
#     ax.scatter(x, y, z, c=colors, s=100)

#     # Set labels and title
#     ax.set_xlabel('Index')
#     ax.set_ylabel('Layer Type')
#     ax.set_zlabel('Number of Parameters')
#     ax.set_title('3D Network Diagram')

#     # Show the plot
#     plt.show()

# generate_network_diagram(df_summary)

In [4]:
from typing import Any
from math import ceil
from PIL import Image, ImageDraw

from PIL import ImageFont
from math import ceil
from utils import * # ColorWheel, Box, vertical_image_concat, linear_layout, self_multiply, get_rgba_tuple
import aggdraw

def layered_view(model: OrderedDict, to_file: str = None, min_z: int = 10, min_xy: int = 10, max_z: int = 400,
                 max_xy: int = 2000,
                 scale_z: float = 0.1, scale_xy: float = 4, type_ignore: list = None, index_ignore: list = None,
                 color_map: dict = None, one_dim_orientation: str = 'z', alpha: float = 0.6,
                 background_fill: Any = 'white', draw_volume: bool = True, padding: int = 10,
                 spacing: int = 10, draw_funnel: bool = True, shade_step=10, legend: bool = False,
                 font: ImageFont = None, font_color: Any = 'black') -> Image:
    """
    Generates a architecture visualization for a given linear keras model (i.e. one input and output tensor for each
    layer) in layered style (great for CNN).

    :param model: A keras model that will be visualized.
    :param to_file: Path to the file to write the created image to. If the image does not exist yet it will be created, else overwritten. Image type is inferred from the file ending. Providing None will disable writing.
    :param min_z: Minimum z size in pixel a layer will have.
    :param min_xy: Minimum x and y size in pixel a layer will have.
    :param max_z: Maximum z size in pixel a layer will have.
    :param max_xy: Maximum x and y size in pixel a layer will have.
    :param scale_z: Scalar multiplier for the z size of each layer.
    :param scale_xy: Scalar multiplier for the x and y size of each layer.
    :param type_ignore: List of layer types in the keras model to ignore during drawing.
    :param index_ignore: List of layer indexes in the keras model to ignore during drawing.
    :param color_map: Dict defining fill and outline for each layer by class type. Will fallback to default values for not specified classes.
    :param one_dim_orientation: Axis on which one dimensional layers should be drawn. Can  be 'x', 'y' or 'z'.
    :param alpha: Alpha value for the image background.
    :param background_fill: Color for the image background. Can be str or (R,G,B,A).
    :param draw_volume: Flag to switch between 3D volumetric view and 2D box view.
    :param padding: Distance in pixel before the first and after the last layer.
    :param spacing: Spacing in pixel between two layers
    :param draw_funnel: If set to True, a funnel will be drawn between consecutive layers
    :param shade_step: Deviation in lightness for drawing shades (only in volumetric view)
    :param legend: Add a legend of the layers to the image
    :param font: Font that will be used for the legend. Leaving this set to None, will use the default font.
    :param font_color: Color for the font if used. Can be str or (R,G,B,A).

    :return: Generated architecture image.
    """

    # Iterate over the model to compute bounds and generate boxes

    boxes = list()
    layer_y = list()
    color_wheel = ColorWheel()
    current_z = padding
    x_off = -1

    layer_types = list()

    img_height = 0
    max_right = 0

    if type_ignore is None:
        type_ignore = list()

    if index_ignore is None:
        index_ignore = list()

    if color_map is None:
        color_map = dict()

    for layer_name, layer_info in summary_nn.items():
        index = layer_info['index']
        layer_type = layer_info['layer_type']
        original_output_shape = tuple(layer_info['output_shape'])
    
        # Ignore layers that the user has opted out to
        if layer_type in type_ignore or index in index_ignore:
            continue

        if layer_type not in layer_types: 
            layer_types.append(layer_type)

        x = min_xy
        y = min_xy
        z = min_z

        output_shape = original_output_shape[1:]  # drop batch size
        if len(output_shape) != 1:
            output_shape = output_shape[1], output_shape[2], output_shape[0]  # move channels to end
        
        # print(f'{index} {layer_type} output_shape {output_shape}')

        if len(output_shape) == 1:
            if one_dim_orientation in ['x', 'y', 'z']:
                output_shape = (1, ) * "xyz".index(one_dim_orientation) + output_shape
            else:
                raise ValueError(f"unsupported orientation: {one_dim_orientation}")

        output_shape = output_shape + (1, ) * (4 - len(output_shape))  # expand 4D.

        x = min(max(output_shape[0] * scale_xy, x), max_xy) # -> 128 for 32 output size
        y = min(max(output_shape[1] * scale_xy, y), max_xy)  
        z = min(max(self_multiply(output_shape[2:]) * scale_z, z), max_z)

        box = Box(alpha=alpha)

        box.depth = 0

        if draw_volume:
            box.depth = x / 3

        if x_off == -1:
            x_off = box.depth / 2

        box.text = layer_type + '\n' + original_output_shape[1:].__str__().strip('()')
        
        # top left coordinate
        box.x1 = current_z - box.depth / 2
        box.y1 = box.depth

        # bottom right coordinate
        box.x2 = box.x1 + z
        box.y2 = box.y1 + y

        box.fill = color_map.get(layer_type, {'fill': get_random_color()})['fill']
        box.outline = color_map.get(layer_type, {'outline': get_random_color()})['fill']
        color_map[layer_type] = {'fill': box.fill, 'outline': box.outline}
        box.shade = shade_step
        boxes.append(box)
        layer_y.append(box.y2 - (box.y1 - box.depth))
        # Update image bounds
        hh = box.y2 - (box.y1 - box.depth)
        if hh > img_height:
            img_height = hh + padding
        if box.x2 + box.depth > max_right:
            max_right = box.x2 + box.depth
        current_z += z + spacing

    # Generate image
    img_width = max_right + x_off + padding
    img = Image.new('RGBA', (int(ceil(img_width)), int(ceil(img_height))), background_fill)
    draw = aggdraw.Draw(img)

    # x, y correction (centering)
    for i, node in enumerate(boxes):
        y_off = (img.height - layer_y[i]) / 2
        node.y1 += y_off
        node.y2 += y_off

        node.x1 += x_off
        node.x2 += x_off

    # Draw created boxes
    last_box = None
    for box in boxes:
        pen = aggdraw.Pen(get_rgba_tuple(box.outline))
        if last_box is not None and draw_funnel:
            draw.line([last_box.x2 + last_box.depth, last_box.y1 - last_box.depth,
                       box.x1 + box.depth, box.y1 - box.depth], pen)
            draw.line([last_box.x2 + last_box.depth, last_box.y2 - last_box.depth,
                       box.x1 + box.depth, box.y2 - box.depth], pen)
            draw.line([last_box.x2, last_box.y2,
                       box.x1, box.y2], pen)
            draw.line([last_box.x2, last_box.y1,
                       box.x1, box.y1], pen)
        box.draw(draw)
        last_box = box
    draw.flush()

    # Create layer color legend
    if legend:
        if font is None:
            font = ImageFont.load_default()

        if hasattr(font, 'getsize'):
            text_height = font.getsize("Ag")[1]
        else:
            text_height = font.getbbox("Ag")[3]
        cube_size = text_height

        depth = 0
        if draw_volume:
            depth = cube_size // 2

        patches = list()

        for layer_type in layer_types:
            if hasattr(font, 'getsize'):
                text_width = font.getsize(layer_type)[0]
            else:
                text_width = font.getbbox(layer_type)[2]
            label_patch_size = (cube_size + depth + spacing + text_width, cube_size + depth)
            # this only works if cube_size is bigger than text height

            img_box = Image.new('RGBA', label_patch_size, background_fill)
            img_text = Image.new('RGBA', label_patch_size, (0, 0, 0, 0))
            draw_box = aggdraw.Draw(img_box)
            draw_text = ImageDraw.Draw(img_text)

            box = Box(alpha=alpha)
            box.x1 = 0
            box.x2 = box.x1 + cube_size
            box.y1 = depth
            box.y2 = box.y1 + cube_size
            box.depth = depth
            box.shade = shade_step
            box.fill = color_map.get(layer_type, {'fill': get_random_color()})['fill']
            box.outline = color_map.get(layer_type, {'outline': get_random_color()})['fill']
            box.draw(draw_box)

            text_x = box.x2 + box.depth + spacing
            text_y = (label_patch_size[1] - text_height) / 2  # 2D center; use text_height and not the current label!
            draw_text.text((text_x, text_y), layer_type, font=font, fill=font_color)

            draw_box.flush()
            img_box.paste(img_text, mask=img_text)
            patches.append(img_box)

        legend_image = linear_layout(patches, max_width=img.width, max_height=img.height, padding=padding, spacing=spacing,
                                     background_fill=background_fill, horizontal=True)
        img = vertical_image_concat(img, legend_image, background_fill=background_fill)

    if to_file is not None:
        img.save(to_file)

    return img

In [5]:
type_ignore = []
layered_view(summary_nn, to_file='layered_view.png', type_ignore=type_ignore, legend=True, padding=25, spacing=20, color_map=color_map)

AttributeError: 'Draw' object has no attribute 'bitmap'