In [2]:
import numpy as np
import torch
from torch import nn


In [3]:
AlexNet = nn.Sequential(nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2),
                    nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2),
                    nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
                    nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
                    nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2),
                    nn.Flatten(),
                    nn.Linear(256 * 6 * 6, 4096), nn.ReLU(), nn.Dropout(p=0.5),
                    nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5),
                    nn.Linear(4096, 1000))

In [4]:
X = torch.randn(1, 3, 224, 224)
for layer in AlexNet:
    X=layer(X)
    print(layer.__class__.__name__,'output shape:\t',X.shape)

Conv2d output shape:	 torch.Size([1, 96, 55, 55])
ReLU output shape:	 torch.Size([1, 96, 55, 55])
MaxPool2d output shape:	 torch.Size([1, 96, 27, 27])
Conv2d output shape:	 torch.Size([1, 256, 27, 27])
ReLU output shape:	 torch.Size([1, 256, 27, 27])
MaxPool2d output shape:	 torch.Size([1, 256, 13, 13])
Conv2d output shape:	 torch.Size([1, 384, 13, 13])
ReLU output shape:	 torch.Size([1, 384, 13, 13])
Conv2d output shape:	 torch.Size([1, 384, 13, 13])
ReLU output shape:	 torch.Size([1, 384, 13, 13])
Conv2d output shape:	 torch.Size([1, 256, 13, 13])
ReLU output shape:	 torch.Size([1, 256, 13, 13])
MaxPool2d output shape:	 torch.Size([1, 256, 6, 6])
Flatten output shape:	 torch.Size([1, 9216])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1,

In [7]:
X = torch.randn(1, 3, 224, 224)

sum = 0
for layer in AlexNet:
    X = layer(X)
    l_name = layer.__class__.__name__
    num = 0
    if isinstance(layer, nn.Conv2d):
        w_size = list(layer.weight.shape)
        b_size = layer.bias.shape[0]
        num = np.prod(w_size) * np.prod(X.shape[2:]) + b_size * np.prod(X.shape[2:])
        
        print(l_name, end=':\t')
        print(*w_size, *X.shape[2:], sep='*', end='')
        print(f'+{b_size}*{X.shape[2]}*{X.shape[3]}\t= {num}')
    if isinstance(layer, nn.ReLU):
        num = np.prod(X.shape[1:])
        
        print(l_name, end=':\t')
        print(*X.shape[1:], sep='*', end='')
        print(f'\t= {num}')
        
    if isinstance(layer, nn.MaxPool2d):
        k = layer.kernel_size
        num = np.prod(X.shape[1:]) * k * k 
        
        print(l_name, end=':\t')
        print(*X.shape[1:], sep='*', end='')
        print(f'*{k}*{k}\t= {num}')
    if isinstance(layer, nn.Linear):
        w_size = list(layer.weight.shape)
        b_size = layer.bias.shape[0]
        parm_num = np.prod(w_size) + b_size
        
        print(l_name, end=':\t')
        print(*w_size, sep='*', end= '')
        print(f'+{b_size}\t = {parm_num}')
    sum += num
print(f'{sum} = {round(sum / 1000 / 1000 / 1000, 1)}G')

Conv2d:	96*3*11*11*55*55+96*55*55	= 105705600
ReLU:	96*55*55	= 290400
MaxPool2d:	96*27*27*3*3	= 629856
Conv2d:	256*96*5*5*27*27+256*27*27	= 448084224
ReLU:	256*27*27	= 186624
MaxPool2d:	256*13*13*3*3	= 389376
Conv2d:	384*256*3*3*13*13+384*13*13	= 149585280
ReLU:	384*13*13	= 64896
Conv2d:	384*384*3*3*13*13+384*13*13	= 224345472
ReLU:	384*13*13	= 64896
Conv2d:	256*384*3*3*13*13+256*13*13	= 149563648
ReLU:	256*13*13	= 43264
MaxPool2d:	256*6*6*3*3	= 82944
Linear:	4096*9216+4096	 = 37752832
ReLU:	4096	= 4096
Linear:	4096*4096+4096	 = 16781312
ReLU:	4096	= 4096
Linear:	1000*4096+1000	 = 4097000
1079044672 = 1.1G multiple-add operations


In [6]:
p_sum = 0
for layer in AlexNet:
    if isinstance(layer, (nn.Linear, nn.Conv2d)):        
        w_size = list(layer.weight.shape)
        b_size = layer.bias.shape[0]
        parm_num = np.prod(w_size) + b_size
        p_sum += parm_num
        
        print(layer.__class__.__name__, end=':\t')
        print(*w_size, sep='*', end= '')
        print(f'+{b_size}\t = {parm_num}')
print(f'{p_sum} = {round(p_sum / 1000 / 1000, 1)}M')

Conv2d:	96*3*11*11+96	 = 34944
Conv2d:	256*96*5*5+256	 = 614656
Conv2d:	384*256*3*3+384	 = 885120
Conv2d:	384*384*3*3+384	 = 1327488
Conv2d:	256*384*3*3+256	 = 884992
Linear:	4096*9216+4096	 = 37752832
Linear:	4096*4096+4096	 = 16781312
Linear:	1000*4096+1000	 = 4097000
62378344 = 62.4M
