In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
deep_nn = nn.Sequential(
    nn.Linear(2, 2),
    nn.ReLU(),
    nn.Linear(2, 2),
    nn.ReLU(),
    nn.Linear(2, 3),
)

wide_nn = nn.Sequential(
    nn.Linear(2, 4),
    nn.ReLU(),
    nn.Linear(4, 3)
)

In [13]:
for p in deep_nn.named_parameters():
    print(p)
    print(' ')

('0.weight', Parameter containing:
tensor([[-0.1687, -0.6446],
        [-0.4486, -0.4321]], requires_grad=True))
 
('0.bias', Parameter containing:
tensor([-0.4198,  0.1431], requires_grad=True))
 
('2.weight', Parameter containing:
tensor([[ 0.1877, -0.0377],
        [-0.4300,  0.0687]], requires_grad=True))
 
('2.bias', Parameter containing:
tensor([-0.4946, -0.2631], requires_grad=True))
 
('4.weight', Parameter containing:
tensor([[-0.5128, -0.2972],
        [-0.3715,  0.4236],
        [ 0.0981,  0.5582]], requires_grad=True))
 
('4.bias', Parameter containing:
tensor([ 0.2530, -0.3467,  0.5263], requires_grad=True))
 


In [18]:
numNodes_wide = 0
numNodes_deep = 0

for p in deep_nn.named_parameters():
    if 'bias' in p[0]:
        numNodes_deep += len(p[1])

for paramName, paramVect in wide_nn.named_parameters():
    if 'bias' in paramName:
        numNodes_wide += len(paramVect)

print(f"Total number of nodes in wide nn: {numNodes_wide}")
print(f"Total number of nodes in deep nn: {numNodes_deep}")


Total number of nodes in wide nn: 7
Total number of nodes in deep nn: 7


### **Trainable Parameters**

In [25]:
numParams_wide = 0
numParams_deep = 0

for p in wide_nn.parameters():
    if p.requires_grad:
        numParams_wide += p.numel()

for p in deep_nn.parameters():
    if p.requires_grad:
        numParams_deep += p.numel()

print(f"Total number of trainable parameters in wide nn: {numParams_wide}")
print(f"Total number of trainable parameters in deep nn: {numParams_deep}")


Total number of trainable parameters in wide nn: 27
Total number of trainable parameters in deep nn: 21


In [34]:
Params_wide = sum(p.numel() for p in wide_nn.parameters() if p.requires_grad)
print(f"Total number of trainable parameters in wide nn is {Params_wide}")

Params_deep = sum(p.numel() for p in deep_nn.parameters() if p.requires_grad)
print(f"Total number of trainable parameters in deep nn is {Params_deep}")


Total number of trainable parameters in wide nn is 27
Total number of trainable parameters in deep nn is 21


In [38]:
from torchsummary import summary
summary(wide_nn, (2, 2))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 2, 4]              12
              ReLU-2                 [-1, 2, 4]               0
            Linear-3                 [-1, 2, 3]              15
Total params: 27
Trainable params: 27
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------
