# 18 June - ConvNet sizes

## Goal : Modify the KazuNet architecture to remove the MaxPooling layers

In [1]:
# Imports
import math
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Add the path to the parent directory to augment search for module
par_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if par_dir not in sys.path:
    sys.path.append(par_dir)
    
# Import the custom plotting module
from plot_utils import plot_utils

In [2]:
# Method to print the output shape given the layer parameters and input size

def print_conv_output(input_size, output_channels, kernel_size, stride,
                       padding, dilation):
    
    print("Input size : num_channels {0}, height {1}, width {2}".format(input_size[0],
                                                                        input_size[1],
                                                                        input_size[2]))
    
    output_height = (input_size[1] + 2*padding[0] - 
                               dilation[0]*(kernel_size[0] - 1) - 1)/stride[0] + 1
    
    output_width = (input_size[2] + 2*padding[1] - 
                               dilation[1]*(kernel_size[1] - 1) - 1)/stride[1] + 1
    
    
    print("Actual output size : num_channels {0}, height {1}, width {2}".format(output_channels,
                                                                         output_height,
                                                                         output_width))
    
    output_height = math.floor(output_height)
    output_width = math.floor(output_width)
    
    print("Floor Output size : num_channels {0}, height {1}, width {2}".format(output_channels,
                                                                         output_height,
                                                                         output_width))
    
    return (output_channels, output_height, output_width)

In [3]:
# Method to print the output shape given the layer parameters and input size

def print_conv_trans_output(input_size, output_channels, kernel_size, stride,
                       padding, dilation, out_padding=(0,0)):
    
    print("Input size : num_channels {0}, height {1}, width {2}".format(input_size[0],
                                                                        input_size[1],
                                                                        input_size[2]))
    
    output_height = (input_size[1]-1)*stride[0] - 2*padding[0] + dilation[0]*(kernel_size[0] - 1) + out_padding[0] + 1
    
    output_width = (input_size[2]-1)*stride[1] - 2*padding[1] + dilation[1]*(kernel_size[1] - 1) + out_padding[1] + 1
    
    
    print("Output size : num_channels {0}, height {1}, width {2}".format(output_channels,
                                                                         output_height,
                                                                         output_width))
    
    return (output_channels, output_height, output_width)

## Test the ConvNet output size

In [4]:
input_size = (19, 16, 40)

input_size = print_conv_output(input_size=input_size,
                               output_channels=128,
                               kernel_size=(3,3),
                               stride=(1,1),
                               padding=(0,0),
                               dilation=(1,1))

Input size : num_channels 19, height 16, width 40
Actual output size : num_channels 128, height 14.0, width 38.0
Floor Output size : num_channels 128, height 14, width 38


In [8]:
# Input size
iwcd_curr_size = (19, 16, 40)

# en_conv1
print("Applying en_conv1")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")


# en_max_conv1
print("Applying en_max_conv1")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv2a
print("Applying en_conv2a")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(4,4),
                                         stride=(2,2),
                                         padding=(1,1),
                                         dilation=(1,1))

# en_conv2b
print("Applying en_conv2b")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_max_conv2
print("Applying en_max_conv2")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv3a
print("Applying en_conv3a")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(4,4),
                                         stride=(2,2),
                                         padding=(1,1),
                                         dilation=(1,1))

Applying en_conv1
Input size : num_channels 19, height 16, width 40
Actual output size : num_channels 64, height 16.0, width 40.0
Floor Output size : num_channels 64, height 16, width 40
-----------------------------Applying ReLu----------------------------------
Applying en_max_conv1
Input size : num_channels 64, height 16, width 40
Actual output size : num_channels 64, height 16.0, width 40.0
Floor Output size : num_channels 64, height 16, width 40
-----------------------------Applying ReLu----------------------------------
Applying en_conv2a
Input size : num_channels 64, height 16, width 40
Actual output size : num_channels 64, height 8.0, width 20.0
Floor Output size : num_channels 64, height 8, width 20
Applying en_conv2b
Input size : num_channels 64, height 8, width 20
Actual output size : num_channels 64, height 8.0, width 20.0
Floor Output size : num_channels 64, height 8, width 20
-----------------------------Applying ReLu----------------------------------
Applying en_max_conv

In [10]:
input_size = print_conv_trans_output(input_size=input_size,
                               output_channels=128,
                               kernel_size=(2,2),
                               stride=(2,2),
                               padding=(0,0),
                               dilation=(1,1))

Input size : num_channels 128, height 3, width 6
Output size : num_channels 128, height 6, width 12


In [15]:
# Input size
iwcd_curr_size = (19, 16, 40)

# en_conv1
print("Applying en_conv1")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")


# en_max_conv1
print("Applying en_max_conv1")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv2a
print("Applying en_conv2a")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(4,4),
                                         stride=(2,2),
                                         padding=(0,0),
                                         dilation=(1,1))

# en_conv2b
print("Applying en_conv2b")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_max_conv2
print("Applying en_max_conv2")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv3a
print("Applying en_conv3a")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(4,4),
                                         stride=(2,2),
                                         padding=(0,0),
                                         dilation=(1,1))

# en_conv3b
print("Applying en_conv3b")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_maxconv3
print("Applying en_maxconv3")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(2,2),
                                         stride=(2,2),
                                         padding=(0,0),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv4
print("Applying en_conv4")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv6
print("Applying en_conv6")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(2,2),
                                         stride=(2,2),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv7
print("Applying en_conv7")
iwcd_curr_size = print_conv_output(input_size=iwcd_curr_size,
                                         output_channels=256,
                                         kernel_size=(3,9),
                                         stride=(1,1),
                                         padding=(0,0),
                                         dilation=(1,1))

Applying en_conv1
Input size : num_channels 38, height 16, width 40
Actual output size : num_channels 64, height 14.0, width 38.0
Floor Output size : num_channels 64, height 14, width 38
-----------------------------Applying ReLu----------------------------------
Applying en_max_conv1
Input size : num_channels 64, height 14, width 38
Actual output size : num_channels 64, height 12.0, width 36.0
Floor Output size : num_channels 64, height 12, width 36
-----------------------------Applying ReLu----------------------------------
Applying en_conv2a
Input size : num_channels 64, height 12, width 36
Actual output size : num_channels 64, height 10.0, width 34.0
Floor Output size : num_channels 64, height 10, width 34
Applying en_conv2b
Input size : num_channels 64, height 10, width 34
Actual output size : num_channels 64, height 10.0, width 34.0
Floor Output size : num_channels 64, height 10, width 34
-----------------------------Applying ReLu----------------------------------
Applying en_max

In [18]:
# Method to print the output shape given the layer parameters and input size

def print_conv_trans_output(input_size, output_channels, kernel_size, stride,
                       padding, dilation, out_padding=(0,0)):
    
    print("Input size : num_channels {0}, height {1}, width {2}".format(input_size[0],
                                                                        input_size[1],
                                                                        input_size[2]))
    
    output_height = (input_size[1]-1)*stride[0] - 2*padding[0] + dilation[0]*(kernel_size[0] - 1) + out_padding[0] + 1
    
    output_width = (input_size[2]-1)*stride[1] - 2*padding[1] + dilation[1]*(kernel_size[1] - 1) + out_padding[1] + 1
    
    
    print("Output size : num_channels {0}, height {1}, width {2}".format(output_channels,
                                                                         output_height,
                                                                         output_width))
    
    return (output_channels, output_height, output_width)

In [19]:
# Reshaped size
iwcd_curr_size = (128, 1, 1)

print("Applying de_conv7")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=256,
                                         kernel_size=(3,9),
                                         stride=(1,1),
                                         padding=(0,0),
                                         dilation=(1,1))

print("Applying de_conv6")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(2,2),
                                         stride=(2,2),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

print("Applying de_conv4")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

print("Applying de_maxconv3")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(2,2),
                                         stride=(2,2),
                                         padding=(0,0),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

print("Applying de_conv3b")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

print("Applying de_conv3a")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=128,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(0,0),
                                         dilation=(1,1))


print("Applying de_max_conv2")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv2b
print("Applying de_conv2b")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(1,1),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# en_conv2a
print("Applying de_conv2a")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(0,0),
                                         dilation=(1,1))

# en_max_conv1
print("Applying de_maxconv1")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(0,0),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

# de_conv1
print("Applying de_conv1")
iwcd_curr_size = print_conv_trans_output(input_size=iwcd_curr_size,
                                         output_channels=64,
                                         kernel_size=(3,3),
                                         stride=(1,1),
                                         padding=(0,0),
                                         dilation=(1,1))

print("-----------------------------Applying ReLu----------------------------------")

Applying de_conv7
Input size : num_channels 128, height 1, width 1
Output size : num_channels 256, height 3, width 9
Applying de_conv6
Input size : num_channels 256, height 3, width 9
Output size : num_channels 128, height 4, width 16
-----------------------------Applying ReLu----------------------------------
Applying de_conv4
Input size : num_channels 128, height 4, width 16
Output size : num_channels 128, height 4, width 16
-----------------------------Applying ReLu----------------------------------
Applying de_maxconv3
Input size : num_channels 128, height 4, width 16
Output size : num_channels 128, height 8, width 32
-----------------------------Applying ReLu----------------------------------
Applying de_conv3b
Input size : num_channels 128, height 8, width 32
Output size : num_channels 128, height 8, width 32
-----------------------------Applying ReLu----------------------------------
Applying de_conv3a
Input size : num_channels 128, height 8, width 32
Output size : num_channels 