In [29]:
# Import necessary libraries
import torch
import torch.nn as nn
from torch.autograd import Variable
from collections import OrderedDict
import numpy as np

# Define a neural network class
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(32, 256)  # Fully connected layer 1
        self.relu1 = nn.ReLU()  # ReLU activation function
        self.fc2 = nn.Linear(256, 128)  # Fully connected layer 2
        self.relu2 = nn.ReLU()  # ReLU activation function
        self.fc3 = nn.Linear(128, 10)  # Fully connected layer 3
        self.softmax = nn.Softmax(dim=1)  # Softmax activation function

    def forward(self, x):
        x = self.fc1(x)  # Pass input through fully connected layer 1
        x = self.relu1(x)  # Apply ReLU activation
        x = self.fc2(x)  # Pass input through fully connected layer 2
        x = self.relu2(x)  # Apply ReLU activation
        x = self.fc3(x)  # Pass input through fully connected layer 3
        x = self.softmax(x)  # Apply softmax activation
        return x

# Set the device to CUDA if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create an instance of the neural network
net = Net()

# Move the neural network to the specified device (GPU if available)
net.to(device)

# Define a function to summarize the model
def summary(model, input_size, batch_size=-1, device="cuda") -> tuple:
    # Define a nested function to register hooks for each module
    def register_hook(module):
        def hook(module, input, output):
            # Get the class name of the module
            class_name = str(module.__class__).split(".")[-1].split("'")[0]
            module_idx = len(summary)

            # Create a key for the module in the summary dictionary
            m_key = "%s-%i" % (class_name, module_idx + 1)
            summary[m_key] = OrderedDict()

            # Store the input shape in the summary dictionary
            summary[m_key]["input_shape"] = list(input[0].size())
            summary[m_key]["input_shape"][0] = batch_size

            # Store the output shape in the summary dictionary
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [
                    [-1] + list(o.size())[1:] for o in output
                ]
            else:
                summary[m_key]["output_shape"] = list(output.size())
                summary[m_key]["output_shape"][0] = batch_size

            # Calculate the number of parameters in the module
            params = 0
            if hasattr(module, "weight") and hasattr(module.weight, "size"):
                params += torch.prod(torch.LongTensor(list(module.weight.size())))
                summary[m_key]["trainable"] = module.weight.requires_grad
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                params += torch.prod(torch.LongTensor(list(module.bias.size())))
            summary[m_key]["nb_params"] = params

        # Register the forward hook for the module
        if (
            not isinstance(module, nn.Sequential)
            and not isinstance(module, nn.ModuleList)
            and not (module == model)
        ):
            hooks.append(module.register_forward_hook(hook))

    # Convert the device name to lowercase
    device = device.lower()

    # Check if the device is valid
    assert device in [
        "cuda",
        "cpu",
    ], "Input device is not valid, please specify 'cuda' or 'cpu'"

    # Set the data type based on the device
    if device == "cuda" and torch.cuda.is_available():
        dtype = torch.cuda.FloatTensor
    else:
        dtype = torch.FloatTensor

    # Convert input_size to a list if it is a tuple
    if isinstance(input_size, tuple):
        input_size = [input_size]

    # Create random input tensors for each input size
    x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]

    # Create an ordered dictionary to store the summary
    summary = OrderedDict()

    # Create a list to store the hooks
    hooks = []

    # Register hooks for each module in the model
    model.apply(register_hook)

    # Make a forward pass through the model
    model(*x)

    # Remove the hooks
    for h in hooks:
        h.remove()

    # Print the summary
    print("------------------------------------------------------------------------")
    line_new = "{:<5}  {:>20}  {:>25} {:>15}".format("Index", "Layer (type)", "Output Shape", "Param #")
    print(line_new)
    print("========================================================================")
    total_params = 0
    total_output = 0
    trainable_params = 0
    for layer in summary:
        line_new = "{:^5}  {:>20}  {:>25} {:>15}".format(
            layer.split("-")[1],
            layer.split("-")[0],
            str(summary[layer]["output_shape"]),
            "{0:,}".format(summary[layer]["nb_params"]),
        )
        total_params += summary[layer]["nb_params"]
        total_output += np.prod(summary[layer]["output_shape"])
        if "trainable" in summary[layer]:
            if summary[layer]["trainable"] == True:
                trainable_params += summary[layer]["nb_params"]
        print(line_new)

    total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
    total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))
    total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
    total_size = total_params_size + total_output_size + total_input_size

    print("========================================================================")
    print("Total params: {0:,}".format(total_params))
    print("Trainable params: {0:,}".format(trainable_params))
    print("Non-trainable params: {0:,}".format(total_params - trainable_params))
    print("------------------------------------------------------------------------")
    print("Input size (MB): %0.2f" % total_input_size)
    print("Forward/backward pass size (MB): %0.2f" % total_output_size)
    print("Params size (MB): %0.2f" % total_params_size)
    print("Estimated Total Size (MB): %0.2f" % total_size)
    print("-------------------------------------------------------------------------")

    return summary

# Call the summary function to summarize the neural network
summary_nn = summary(net, (3, 32, 32))

------------------------------------------------------------------------
Index          Layer (type)               Output Shape         Param #
  1                  Linear           [-1, 3, 32, 256]           8,448
  2                    ReLU           [-1, 3, 32, 256]               0
  3                  Linear           [-1, 3, 32, 128]          32,896
  4                    ReLU           [-1, 3, 32, 128]               0
  5                  Linear            [-1, 3, 32, 10]           1,290
  6                 Softmax            [-1, 3, 32, 10]               0
Total params: 42,634
Trainable params: 42,634
Non-trainable params: 0
------------------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.58
Params size (MB): 0.16
Estimated Total Size (MB): 0.75
-------------------------------------------------------------------------


In [19]:
for i, layer in enumerate(summary_nn):
    print(i, layer)

0 Linear-1
1 ReLU-2
2 Linear-3
3 ReLU-4
4 Linear-5
5 Softmax-6
