In [None]:
##model 1
import numpy as np

# Define the network architecture
# Each tuple is (filters, kernel_size, stride, input_channels for conv layers or input_units for dense layers)
conv_layers = [
    (32, 3, 2, 3),  # First Conv2D layer
    (64, 3, 2, 32),  # Second Conv2D layer
    (128, 3, 2, 64),  # Third Conv2D layer
    (128, 3, 1, 128),  # Fourth to Seventh Conv2D layers repeated 4 times
    (128, 3, 1, 128),
    (128, 3, 1, 128),
    (128, 3, 1, 128)
]

dense_layers = [
    (128, 0),  # First Dense layer, input units will be calculated after flattening
    (10, 128)  # Second Dense layer
]

# Initialize variables to hold calculations
output_sizes = [32]  # Starting with input image size
params = []
macs = []
layer_names = []

# Calculate for Conv2D layers
for i, (filters, kernel_size, stride, input_channels) in enumerate(conv_layers):
    layer_name = f"Conv2D-{filters}f-{kernel_size}x{kernel_size}-s{stride}"
    layer_names.append(layer_name)
    params_conv = (kernel_size * kernel_size * input_channels + 1) * filters
    params.append(params_conv)
    
    output_size = np.ceil(output_sizes[-1] / stride)
    output_sizes.append(output_size)
    macs_conv = kernel_size * kernel_size * input_channels * filters * output_size * output_size
    macs.append(macs_conv)

# Add BatchNorm parameters and MACs after each Conv2D layer
for filters in [layer[0] for layer in conv_layers]:
    layer_names.append(f"BatchNorm-{filters}")
    params_bn = 2 * filters  # Scale and shift parameters
    params.append(params_bn)
    macs.append(0)  # BatchNorm does not contribute MACs in the traditional sense

# MaxPooling
layer_names.append("MaxPooling")
params.append(0)
output_size = np.ceil(output_sizes[-1] / 4)
output_sizes.append(output_size)
macs.append(0)

# Flatten
layer_names.append("Flatten")
params.append(0)
macs.append(0)
flattened_size = output_size * output_size * conv_layers[-1][0]  # Last Conv2D filters as channels
output_sizes.append(flattened_size)

# Calculate for Dense layers
for i, (output_units, input_units) in enumerate(dense_layers):
    if input_units == 0:  # Update input_units for the first dense layer
        input_units = int(flattened_size)
        dense_layers[i] = (output_units, input_units)
    layer_name = f"Dense-{output_units}u"
    layer_names.append(layer_name)
    params_dense = (input_units + 1) * output_units
    params.append(params_dense)
    macs_dense = input_units * output_units
    macs.append(macs_dense)

# Add BatchNorm parameters and MACs after each Dense layer
for output_units in [layer[0] for layer in dense_layers]:
    layer_names.append(f"BatchNorm-{output_units}")
    params_bn = 2 * output_units  # Scale and shift parameters
    params.append(params_bn)
    macs.append(0)  # BatchNorm does not contribute MACs in the traditional sense

# Compile results into a table
results_table = []
for i, layer_name in enumerate(layer_names):
    results_table.append((layer_name, params[i], macs[i], output_sizes[i] if i < len(output_sizes) else "N/A"))

results_table


[('Conv2D-32f-3x3-s2', 896, 221184.0, 32),
 ('Conv2D-64f-3x3-s2', 18496, 1179648.0, 16.0),
 ('Conv2D-128f-3x3-s2', 73856, 1179648.0, 8.0),
 ('Conv2D-128f-3x3-s1', 147584, 2359296.0, 4.0),
 ('Conv2D-128f-3x3-s1', 147584, 2359296.0, 4.0),
 ('Conv2D-128f-3x3-s1', 147584, 2359296.0, 4.0),
 ('Conv2D-128f-3x3-s1', 147584, 2359296.0, 4.0),
 ('BatchNorm-32', 64, 0, 4.0),
 ('BatchNorm-64', 128, 0, 1.0),
 ('BatchNorm-128', 256, 0, 128.0),
 ('BatchNorm-128', 256, 0, 'N/A'),
 ('BatchNorm-128', 256, 0, 'N/A'),
 ('BatchNorm-128', 256, 0, 'N/A'),
 ('BatchNorm-128', 256, 0, 'N/A'),
 ('MaxPooling', 0, 0, 'N/A'),
 ('Flatten', 0, 0, 'N/A'),
 ('Dense-128u', 16512, 16384, 'N/A'),
 ('Dense-10u', 1290, 1280, 'N/A'),
 ('BatchNorm-128', 256, 0, 'N/A'),
 ('BatchNorm-10', 20, 0, 'N/A')]

In [1]:
#model 2
import numpy as np

# Re-initialize calculations after reset
# Calculations for the depthwise-separable convolution layers
input_channels = 3  # Initial input channels for the first depthwise layer
output_channels = 64  # For the pointwise convolution

# Depthwise Convolution
kernel_size_dw = 3  # Kernel size for the depthwise convolution
stride_dw = 2  # Stride for the depthwise convolution
params_dw = kernel_size_dw * kernel_size_dw * input_channels  # No bias
output_size_dw = np.ceil(32 / stride_dw)  # Output size calculation for "same" padding
macs_dw = kernel_size_dw * kernel_size_dw * input_channels * output_size_dw * output_size_dw

# Pointwise Convolution (1x1 Convolution)
kernel_size_pw = 1  # Kernel size for the pointwise convolution
stride_pw = 1  # Stride for the pointwise convolution, but irrelevant for 1x1
params_pw = kernel_size_pw * kernel_size_pw * input_channels * output_channels  # No bias
macs_pw = kernel_size_pw * kernel_size_pw * input_channels * output_channels * output_size_dw * output_size_dw

# Compile results for depthwise and pointwise layers
depthwise_pointwise_results = {
    "Depthwise_Convolution": {
        "Parameters": params_dw,
        "MACs": macs_dw,
        "Output_Size": output_size_dw
    },
    "Pointwise_Convolution": {
        "Parameters": params_pw,
        "MACs": macs_pw,
        "Output_Size": output_size_dw  # Output size remains the same for pointwise
    }
}

depthwise_pointwise_results

{'Depthwise_Convolution': {'Parameters': 27,
  'MACs': 6912.0,
  'Output_Size': 16.0},
 'Pointwise_Convolution': {'Parameters': 192,
  'MACs': 49152.0,
  'Output_Size': 16.0}}