# RESNET

In [206]:
import torch, math
import torch.nn as nn
from Pipeline._2020211A2 import ImageDataset, VGG_Q2
from torch.utils.data import DataLoader

dataset = ImageDataset(split="train")
loader = DataLoader(dataset=dataset, batch_size=128, shuffle=True)

Files already downloaded and verified


In [210]:
def get_resnet_image_block(in_channel, out_channel, keep_size_same=True):
    if not keep_size_same:
        resnet_block = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=out_channel)
        )
        skip_connection = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=2),
        )
    else:
        resnet_block = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, padding='same'),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, padding='same'),
            nn.BatchNorm2d(num_features=out_channel)
        )
        skip_connection = nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, padding='same')
        )
    return resnet_block, skip_connection

dropout = nn.Dropout(p=0.5)
block, signal = get_resnet_image_block(3, 64, keep_size_same=False)

for image, label in loader:
    print(image.shape)
    b_out = block(image)
    s_out = signal(image)
    print(b_out.shape)
    print(s_out.shape)
    out = b_out + s_out
    print(out.shape)
    drop_out = dropout(out)
    break

torch.Size([128, 3, 32, 32])
torch.Size([128, 64, 16, 16])
torch.Size([128, 64, 16, 16])
torch.Size([128, 64, 16, 16])


# VGG

In [1]:
import torch, math
import torch.nn as nn
from Pipeline._2020211A2 import ImageDataset, VGG_Q2
from torch.utils.data import DataLoader

dataset = ImageDataset(split="train")
loader = DataLoader(dataset=dataset, batch_size=128, shuffle=True)

  warn(


Files already downloaded and verified
Files already downloaded and verified


In [4]:
def get_vgg_image_block(prev_channel, channel, kernel_size, num_conv, padding=0):
    if num_conv == 2:
        return nn.Sequential(
            nn.Conv2d(in_channels=prev_channel, out_channels=channel, kernel_size=kernel_size, padding=padding),
            nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=kernel_size, padding=padding),
            nn.MaxPool2d(kernel_size=2)
        )
    elif num_conv == 3:
        return nn.Sequential(
            nn.Conv2d(in_channels=prev_channel, out_channels=channel, kernel_size=kernel_size, padding=padding),
            nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=kernel_size, padding=padding),
            nn.Conv2d(in_channels=channel, out_channels=channel, kernel_size=kernel_size, padding=padding),
            nn.MaxPool2d(kernel_size=2)
        )
    return None

channel, kernel_size = [3, 14], [1]
for i in range(4):
    channel.append(math.ceil(channel[-1] - 0.35*channel[-1]))
    new_kernel = math.ceil(kernel_size[-1] + 0.25*kernel_size[-1])
    kernel_size.append(new_kernel if new_kernel % 2 == 1 else new_kernel + 1)
print(channel, kernel_size)
im_block1 = get_vgg_image_block(channel[0], channel[1], kernel_size[0], num_conv=2, padding=2)
im_block2 = get_vgg_image_block(channel[1], channel[2], kernel_size[1], num_conv=2, padding=3)
im_block3 = get_vgg_image_block(channel[2], channel[3], kernel_size[2], num_conv=3, padding=3)
im_block4 = get_vgg_image_block(channel[3], channel[4], kernel_size[3], num_conv=3, padding=4)
im_block5 = get_vgg_image_block(channel[4], channel[5], kernel_size[4], num_conv=3, padding=4)

for image, label in loader:
    print(image.shape)
    image = im_block1(image)
    print(image.shape)
    image = im_block2(image)
    print(image.shape)
    image = im_block3(image)
    print(image.shape)
    image = im_block4(image)
    print(image.shape)
    image = im_block5(image)
    print(image.shape)
    break

[3, 14, 10, 7, 5, 4] [1, 3, 5, 7, 9]
torch.Size([128, 3, 32, 32])
torch.Size([128, 14, 20, 20])
torch.Size([128, 10, 14, 14])
torch.Size([128, 7, 10, 10])
torch.Size([128, 5, 8, 8])
torch.Size([128, 4, 4, 4])


In [14]:
import torch, math
import torch.nn as nn
from Pipeline._2020211A2 import AudioDataset, VGG_Q2
from torch.utils.data import DataLoader

dataset = AudioDataset(split="train")
loader = DataLoader(dataset=dataset, batch_size=128, shuffle=True)

In [27]:
def get_vgg_audio_block(prev_channel, channel, kernel_size, num_conv):
    if num_conv == 2:
        return nn.Sequential(
            nn.Conv1d(in_channels=prev_channel, out_channels=channel, kernel_size=kernel_size),
            nn.Conv1d(in_channels=channel, out_channels=channel, kernel_size=kernel_size),
            nn.MaxPool1d(kernel_size=3)
        )
    elif num_conv == 3:
        return nn.Sequential(
            nn.Conv1d(in_channels=prev_channel, out_channels=channel, kernel_size=kernel_size),
            nn.Conv1d(in_channels=channel, out_channels=channel, kernel_size=kernel_size),
            nn.Conv1d(in_channels=channel, out_channels=channel, kernel_size=kernel_size),
            nn.MaxPool1d(kernel_size=3)
        )
    return None

channel, kernel_size = [1, 8], [3]
for i in range(4):
    channel.append(math.ceil(channel[-1] - 0.35*channel[-1]))
    kernel_size.append(math.ceil(kernel_size[-1] + 0.25*kernel_size[-1]))
print(channel, kernel_size)
au_block1 = get_vgg_audio_block(channel[0], channel[1], kernel_size[0], 2)
au_block2 = get_vgg_audio_block(channel[1], channel[2], kernel_size[1], 2)
au_block3 = get_vgg_audio_block(channel[2], channel[3], kernel_size[2], 3)
au_block4 = get_vgg_audio_block(channel[3], channel[4], kernel_size[3], 3)
au_block5 = get_vgg_audio_block(channel[4], channel[5], kernel_size[4], 3)


for audio, label in loader:
    print(audio.shape)
    print(audio.shape)
    audio = au_block1(audio)
    print(audio.shape)
    audio = au_block2(audio)
    print(audio.shape)
    audio = au_block3(audio)
    print(audio.shape)
    audio = au_block4(audio)
    print(audio.shape)
    audio = au_block5(audio)
    print(audio.shape)
    break

[1, 8, 6, 4, 3, 2] [3, 4, 5, 7, 9]
torch.Size([128, 1, 16000])
torch.Size([128, 1, 16000])
torch.Size([128, 8, 5332])
torch.Size([128, 6, 1775])
torch.Size([128, 4, 587])
torch.Size([128, 3, 189])
torch.Size([128, 2, 55])


# Inception

In [6]:
import torch, math
import torch.nn as nn
from Pipeline._2020211A2 import ImageDataset, VGG_Q2
from torch.utils.data import DataLoader

dataset = ImageDataset(split="train")
loader = DataLoader(dataset=dataset, batch_size=128, shuffle=True)

Files already downloaded and verified


In [15]:
def get_inception_image_block(in_channel, out_channel, keep_size_same=True):
    if keep_size_same:
        return nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
        ), nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, padding='same'),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, padding='same'),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, padding='same'),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, padding='same'),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
        )
    else:
        return nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=2),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
        ), nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=2),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, stride=2, padding=1),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
        )
        

channel = [3, 8, 16, 32, 64]
im_b1_path1, im_b1_path2, im_b1_path3, im_b1_path4 = get_inception_image_block(channel[0], channel[1], keep_size_same=False)
im_b2_path1, im_b2_path2, im_b2_path3, im_b2_path4 = get_inception_image_block(channel[1], channel[2])
im_b3_path1, im_b3_path2, im_b3_path3, im_b3_path4 = get_inception_image_block(channel[2], channel[3])
im_b4_path1, im_b4_path2, im_b4_path3, im_b4_path4 = get_inception_image_block(channel[3], channel[4])

image = torch.randn(128, 3, 4, 4)
p1_out = im_b1_path1(image)
p2_out = im_b1_path2(image)
p3_out = im_b1_path3(image)
p4_out = im_b1_path4(image)
print(p1_out.shape, p2_out.shape, p3_out.shape, p4_out.shape)
out = p1_out + p2_out + p3_out + p4_out
print(out.shape)

torch.Size([128, 8, 2, 2]) torch.Size([128, 8, 2, 2]) torch.Size([128, 8, 2, 2]) torch.Size([128, 8, 2, 2])
torch.Size([128, 8, 2, 2])


In [2]:
import torch, math
import torch.nn as nn
from Pipeline._2020211A2 import AudioDataset, VGG_Q2
from torch.utils.data import DataLoader

dataset = AudioDataset(split="train")
loader = DataLoader(dataset=dataset, batch_size=128, shuffle=True)

  warn(


Files already downloaded and verified


In [5]:
def get_inception_audio_block(in_channel, out_channel, keep_size_same=False):
    if keep_size_same:
        return nn.Sequential(
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
        ), nn.Sequential(
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, padding='same'),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, padding='same'),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, padding='same'),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, padding='same'),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.MaxPool1d(kernel_size=3, stride=1, padding=1),
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
        )
    else:
        return nn.Sequential(
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=2),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
        ), nn.Sequential(
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=1, padding=2),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, stride=2, padding=1),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU()
        ), nn.Sequential(
            nn.MaxPool1d(kernel_size=3, stride=2, padding=1),
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
        )
    
channel = [1, 5, 10, 15, 20]
au_b1_path1, au_b1_path2, au_b1_path3, au_b1_path4 = get_inception_audio_block(channel[0], channel[1], keep_size_same=False)
au_b2_path1, au_b2_path2, au_b2_path3, au_b2_path4 = get_inception_audio_block(channel[1], channel[2])
au_b3_path1, au_b3_path2, au_b3_path3, au_b3_path4 = get_inception_audio_block(channel[2], channel[3])
au_b4_path1, au_b4_path2, au_b4_path3, au_b4_path4 = get_inception_audio_block(channel[3], channel[4])

x = torch.randn(128, 1, 16000)
p1_out = au_b1_path1(x)
p2_out = au_b1_path2(x)
p3_out = au_b1_path3(x)
p4_out = au_b1_path4(x)
print(p1_out.shape, p2_out.shape, p3_out.shape, p4_out.shape)
out = p1_out + p2_out + p3_out + p4_out
print(out.shape)

torch.Size([128, 5, 8000]) torch.Size([128, 5, 8000]) torch.Size([128, 5, 8000]) torch.Size([128, 5, 8000])
torch.Size([128, 5, 8000])


In [53]:
def get_inception_audio_block(in_channel, out_channel, path1, path2, path3, path4):
    return nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=path1['s'], padding=path1['p'], dilation=path1['d']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
    ), nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=path2['s1'], padding=path2['p1'], dilation=path2['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, stride=path2['s2'], padding=path2['p2'], dilation=path2['d2']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, stride=path3['s1'], padding=path3['p1'], dilation=path3['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=5, stride=path3['s2'], padding=path3['p2'], dilation=path3['d2']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.MaxPool1d(kernel_size=3, stride=path4['s'], padding=path4['p'], dilation=path4['d']),
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
    )

channels = [1, 5, 10, 15, 20]
x = torch.randn(128, 1, 16000)

path1 = {'s': 4, 'p': 0, 'd': 1}
path2 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 1, 'd2': 1}
path3 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 1, 'd2': 1}
path4 = {'s': 4, 'p': 0, 'd': 1}
au_b1_path1, au_b1_path2, au_b1_path3, au_b1_path4 = get_inception_audio_block(channels[0], channels[1], path1, path2, path3, path4)
x = au_b1_path1(x) + au_b1_path2(x) + au_b1_path3(x) + au_b1_path4(x)
print(x.shape)

path1 = {'s': 4, 'p': 0, 'd': 1}
path2 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 1, 'd2': 1}
path3 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 1, 'd2': 1}
path4 = {'s': 4, 'p': 0, 'd': 1}
au_b2_path1, au_b2_path2, au_b2_path3, au_b2_path4 = get_inception_audio_block(channels[1], channels[2], path1, path2, path3, path4)
x = au_b2_path1(x) + au_b2_path2(x) + au_b2_path3(x) + au_b2_path4(x)
print(x.shape)

path1 = {'s': 4, 'p': 0, 'd': 1}
path2 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 1, 'd2': 1}
path3 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 1, 'd2': 1}
path4 = {'s': 4, 'p': 0, 'd': 1}
au_b3_path1, au_b3_path2, au_b3_path3, au_b3_path4 = get_inception_audio_block(channels[2], channels[3], path1, path2, path3, path4)
x = au_b3_path1(x) + au_b3_path2(x) + au_b3_path3(x) + au_b3_path4(x)
print(x.shape)

path1 = {'s': 4, 'p': 0, 'd': 1}
path2 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 2, 'd2': 1}
path3 = {'s1': 2, 'p1': 2, 'd1': 1, 's2': 2, 'p2': 2, 'd2': 1}
path4 = {'s': 4, 'p': 1, 'd': 1}
au_b4_path1, au_b4_path2, au_b4_path3, au_b4_path4 = get_inception_audio_block(channels[3], channels[4], path1, path2, path3, path4)
x = au_b4_path1(x) + au_b4_path2(x) + au_b4_path3(x) + au_b4_path4(x)
print(x.shape)

torch.Size([128, 5, 4000])
torch.Size([128, 10, 1000])
torch.Size([128, 15, 250])
torch.Size([128, 20, 63])


# CustomNet

In [4]:

import torch, math
import torch.nn as nn
from Pipeline._2020211A2 import ImageDataset, VGG_Q2
from torch.utils.data import DataLoader

dataset = ImageDataset(split="train")
loader = DataLoader(dataset=dataset, batch_size=128, shuffle=True)
print(len(dataset), len(loader))

  warn(


Files already downloaded and verified
Files already downloaded and verified
42500 333


In [36]:
import math
channel, kernel_size = [3, 64], [3]
for i in range(9):
    channel.append(math.ceil(channel[-1] - 0.35*channel[-1]))
    new_kernel = math.ceil(kernel_size[-1] + 0.25*kernel_size[-1])
    kernel_size.append(new_kernel if new_kernel % 2 == 1 else new_kernel + 1)
print(channel, kernel_size)
print(len(channel), len(kernel_size))

[3, 64, 42, 28, 19, 13, 9, 6, 4, 3, 2] [3, 5, 7, 9, 13, 17, 23, 29, 37, 47]
11 10


In [38]:
def get_resnet_image_block(in_channel, out_channel, kernel_size, block, skip):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=block['s1'], padding=block['p1'], dilation=block['d1']),
        nn.BatchNorm2d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=block['s2'], padding=block['p2'], dilation=block['d2']),
        nn.BatchNorm2d(num_features=out_channel)
    ), nn.Sequential(
        nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=skip['s'], padding=skip['p'], dilation=skip['d']),
    )

def get_inception_image_block(in_channel, out_channel, kernel_size, path1, path2, path3, path4):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path1['s'], padding=path1['p'], dilation=path1['d']),
        nn.BatchNorm2d(num_features=out_channel),
        nn.ReLU(),
    ), nn.Sequential(
        nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path2['s1'], padding=path2['p1'], dilation=path2['d1']),
        nn.BatchNorm2d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path2['s2'], padding=path2['p2'], dilation=path2['d2']),
        nn.BatchNorm2d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path3['s1'], padding=path3['p1'], dilation=path3['d1']),
        nn.BatchNorm2d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path3['s2'], padding=path3['p2'], dilation=path3['d2']),
        nn.BatchNorm2d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.MaxPool2d(kernel_size=kernel_size, stride=path4['s'], padding=path4['p']),
        nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
    )

        
x = torch.randn(128, 3, 32, 32)
print(kernel_size, len(kernel_size))

res_b1, res_s1 = get_resnet_image_block(3, 8, kernel_size[0], block={'p1': 1, 'p2': 1, 's1': 1, 's2': 1, 'd1': 1, 'd2': 1}, skip={'s': 1, 'p': 0, 'd': 1})
x = res_b1(x) + res_s1(x)
print(x.shape)

res_b2, res_s2 = get_resnet_image_block(8, 8, kernel_size[1], block={'p1': 2, 'p2': 2, 's1': 1, 's2': 1, 'd1': 1, 'd2': 1}, skip={'s': 1, 'p': 0, 'd': 1})
x = res_b2(x) + res_s2(x)
print(x.shape)

path1 = {'s': 1, 'p': 3, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 3}
incep_b1_p1, incep_b1_p2, incep_b1_p3, incep_b1_p4 = get_inception_image_block(8, 8, kernel_size[2], path1, path2, path3, path4)
x = incep_b1_p1(x) + incep_b1_p2(x) + incep_b1_p3(x) + incep_b1_p4(x)
print(x.shape)

path1 = {'s': 1, 'p': 3, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 4, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 4, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 3}
incep_b2_p1, incep_b2_p2, incep_b2_p3, incep_b2_p4 = get_inception_image_block(8, 8, kernel_size[3], path1, path2, path3, path4)
x =incep_b2_p1(x) + incep_b2_p2(x) + incep_b2_p3(x) + incep_b2_p4(x)
print(x.shape)

res_b3, res_s3 = get_resnet_image_block(8, 8, kernel_size[4], block={'p1': 6, 'p2': 6, 's1': 1, 's2': 1, 'd1': 1, 'd2': 1}, skip={'s': 1, 'p': 0, 'd': 1})
x = res_b3(x) + res_s3(x)
print(x.shape)

path1 = {'s': 1, 'p': 7, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 8, 'p2': 7, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 8, 'p2': 7, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 7}
incep_b3_p1, incep_b3_p2, incep_b3_p3, incep_b3_p4 = get_inception_image_block(8, 8, kernel_size[5], path1, path2, path3, path4)
x = incep_b3_p1(x) + incep_b3_p2(x) + incep_b3_p3(x) + incep_b3_p4(x)

res_b4, res_s4 = get_resnet_image_block(8, 8, kernel_size[6], block={'p1': 11, 'p2': 11, 's1': 1, 's2': 1, 'd1': 1, 'd2': 1}, skip={'s': 1, 'p': 0, 'd': 1})
x = res_b4(x) + res_s4(x)
print(x.shape)

path1 = {'s': 1, 'p': 10, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 12, 'p2': 12, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 12, 'p2': 12, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 10}
incep_b4_p1, incep_b4_p2, incep_b4_p3, incep_b4_p4 = get_inception_image_block(8, 8, kernel_size[7], path1, path2, path3, path4)
x = incep_b4_p1(x) + incep_b4_p2(x) + incep_b4_p3(x) + incep_b4_p4(x)
print(x.shape)

res_b5, res_s5 = get_resnet_image_block(8, 8, kernel_size[8], block={'p1': 18, 'p2': 18, 's1': 1, 's2': 1, 'd1': 1, 'd2': 1}, skip={'s': 1, 'p': 0, 'd': 1})
x = res_b5(x) + res_s5(x)
print(x.shape)

path1 = {'s': 1, 'p': 15, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 19, 'p2': 19, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 19, 'p2': 19, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 15}
incep_b5_p1, incep_b5_p2, incep_b5_p3, incep_b5_p4 = get_inception_image_block(8, 8, kernel_size[9], path1, path2, path3, path4)
x = incep_b5_p1(x) + incep_b5_p2(x) + incep_b5_p3(x) + incep_b5_p4(x)
print(x.shape)

[3, 5, 7, 9, 13, 17, 23, 29, 37, 47] 10
torch.Size([128, 8, 32, 32])
torch.Size([128, 8, 32, 32])
torch.Size([128, 8, 32, 32])
torch.Size([128, 8, 30, 30])
torch.Size([128, 8, 30, 30])
torch.Size([128, 8, 28, 28])
torch.Size([128, 8, 20, 20])
torch.Size([128, 8, 20, 20])
torch.Size([128, 8, 4, 4])


In [5]:
import math
channel, kernel_size = [1, 64], [3]
for i in range(9):
    channel.append(math.ceil(channel[-1] - 0.35*channel[-1]))
    new_kernel = math.ceil(kernel_size[-1] + 0.25*kernel_size[-1])
    kernel_size.append(new_kernel if new_kernel % 2 == 1 else new_kernel + 1)
print(channel, kernel_size)
print(len(channel), len(kernel_size))

[1, 64, 42, 28, 19, 13, 9, 6, 4, 3, 2] [3, 5, 7, 9, 13, 17, 23, 29, 37, 47]
11 10


In [10]:
def get_resnet_audio_block(in_channel, out_channel, kernel_size, block, skip):
    return nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=block['s1'], padding=block['p1'], dilation=block['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=block['s2'], padding=block['p2'], dilation=block['d2']),
        nn.BatchNorm1d(num_features=out_channel)
    ), nn.Sequential(nn.Conv1d(in_channel, out_channel, kernel_size=1, stride=skip['s'], padding=skip['p'], dilation=skip['d']))
    
    
def get_inception_audio_block(in_channel, out_channel, kernel_size, path1, path2, path3, path4):
    return nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path1['s'], padding=path1['p'], dilation=path1['d']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
    ), nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path2['s1'], padding=path2['p1'], dilation=path2['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path2['s2'], padding=path2['p2'], dilation=path2['d2']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path3['s1'], padding=path3['p1'], dilation=path3['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path3['s2'], padding=path3['p2'], dilation=path3['d2']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.MaxPool1d(kernel_size=kernel_size, stride=path4['s'], padding=path4['p'], dilation=path4['d']),
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
    )

x = torch.randn(128, 1, 16000)

resnet_block1, resnet_skip1 = get_resnet_audio_block(channel[0], channel[1], kernel_size[0], block={'p1': 1, 'p2': 1, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 0, 'd': 1})
x = resnet_block1(x) + resnet_skip1(x)
print(x.shape)

resnet_block2, resnet_skip2 = get_resnet_audio_block(channel[1], channel[2], kernel_size[1], block={'p1': 2, 'p2': 2, 's1': 1, 's2': 1, 'd1': 1, 'd2': 1}, skip={'s': 1, 'p': 0, 'd': 1})
x = resnet_block2(x) + resnet_skip2(x)
print(x.shape)

path1 = {'s': 1, 'p': 3, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 3, 'd' : 1}
inception_block1_path1, inception_block1_path2, inception_block1_path3, inception_block1_path4 = get_inception_audio_block(channel[2], channel[3], kernel_size[2], path1, path2, path3, path4)
x = inception_block1_path1(x) + inception_block1_path2(x) + inception_block1_path3(x) + inception_block1_path4(x)
print(x.shape)

path1 = {'s': 2, 'p': 2, 'd': 1}
path2 = {'s1': 2, 's2': 1, 'p1': 4, 'p2': 3, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 2, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path4 = {'s': 2, 'p': 2, 'd' : 1}
inception_block2_path1, inception_block2_path2, inception_block2_path3, inception_block2_path4 = get_inception_audio_block(channel[3], channel[4], kernel_size[3], path1, path2, path3, path4)
x = inception_block2_path1(x) + inception_block2_path2(x) + inception_block2_path3(x) + inception_block2_path4(x)
print(x.shape)

resnet_block3, resnet_skip3 = get_resnet_audio_block(channel[4], channel[5], kernel_size[4], block={'p1': 7, 'p2': 6, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 1, 'd': 1})
x = resnet_block3(x) + resnet_skip3(x)
print(x.shape)

path1 = {'s': 2, 'p': 8, 'd': 1}
path2 = {'s1': 2, 's2': 1, 'p1': 8, 'p2': 8, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 2, 'p1': 8, 'p2': 8, 'd1': 1, 'd2': 1}
path4 = {'s': 2, 'p': 8, 'd' : 1}
inception_block3_path1, inception_block3_path2, inception_block3_path3, inception_block3_path4 = get_inception_audio_block(channel[5], channel[6], kernel_size[5], path1, path2, path3, path4)
x = inception_block3_path1(x) + inception_block3_path2(x) + inception_block3_path3(x) + inception_block3_path4(x)
print(x.shape)

resnet_block4, resnet_skip4 = get_resnet_audio_block(channel[6], channel[7], kernel_size[6], block={'p1': 11, 'p2': 11, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 0, 'd': 1})
x = resnet_block4(x) + resnet_skip4(x)
print(x.shape)

path1 = {'s': 2, 'p': 10, 'd': 1}
path2 = {'s1': 2, 's2': 1, 'p1': 12, 'p2': 13, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 2, 'p1': 12, 'p2': 12, 'd1': 1, 'd2': 1}
path4 = {'s': 2, 'p': 10, 'd' : 1}
inception_block4_path1, inception_block4_path2, inception_block4_path3, inception_block4_path4 = get_inception_audio_block(channel[7], channel[8], kernel_size[7], path1, path2, path3, path4)
x = inception_block4_path1(x) + inception_block4_path2(x) + inception_block4_path3(x) + inception_block4_path4(x)
print(x.shape)

resnet_block5, resnet_skip5 = get_resnet_audio_block(channel[8], channel[9], kernel_size[8], block={'p1': 18, 'p2': 18, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 0, 'd': 1})
x = resnet_block5(x) + resnet_skip5(x)
print(x.shape)

path1 = {'s': 2, 'p': 11, 'd': 1}
path2 = {'s1': 2, 's2': 1, 'p1': 19, 'p2': 19, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 2, 'p1': 19, 'p2': 15, 'd1': 1, 'd2': 1}
path4 = {'s': 2, 'p': 11, 'd' : 1}
inception_block5_path1, inception_block5_path2, inception_block5_path3, inception_block5_path4 = get_inception_audio_block(channel[9], channel[10], kernel_size[9], path1, path2, path3, path4)
x = inception_block5_path1(x) + inception_block5_path2(x) + inception_block5_path3(x) + inception_block5_path4(x)
print(x.shape)


torch.Size([128, 64, 8000])
torch.Size([128, 42, 8000])
torch.Size([128, 28, 8000])
torch.Size([128, 19, 3998])
torch.Size([128, 13, 2000])
torch.Size([128, 9, 1000])
torch.Size([128, 6, 500])
torch.Size([128, 4, 246])
torch.Size([128, 3, 123])
torch.Size([128, 2, 50])


In [59]:
def get_resnet_audio_block(in_channel, out_channel, kernel_size, block, skip):
    return nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=block['s1'], padding=block['p1'], dilation=block['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=block['s2'], padding=block['p2'], dilation=block['d2']),
        nn.BatchNorm1d(num_features=out_channel)
    ), nn.Sequential(nn.Conv1d(in_channel, out_channel, kernel_size=1, stride=skip['s'], padding=skip['p'], dilation=skip['d']))
    
    
def get_inception_audio_block(in_channel, out_channel, kernel_size, path1, path2, path3, path4):
    return nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path1['s'], padding=path1['p'], dilation=path1['d']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
    ), nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path2['s1'], padding=path2['p1'], dilation=path2['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path2['s2'], padding=path2['p2'], dilation=path2['d2']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path3['s1'], padding=path3['p1'], dilation=path3['d1']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU(),
        nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=kernel_size, stride=path3['s2'], padding=path3['p2'], dilation=path3['d2']),
        nn.BatchNorm1d(num_features=out_channel),
        nn.ReLU()
    ), nn.Sequential(
        nn.MaxPool1d(kernel_size=kernel_size, stride=path4['s'], padding=path4['p'], dilation=path4['d']),
        nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=1)
    )

x = torch.randn(128, 1, 16000)

resnet_block1, resnet_skip1 = get_resnet_audio_block(channel[0], channel[1], kernel_size[0], block={'p1': 1, 'p2': 1, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 0, 'd': 1})
x = resnet_block1(x) + resnet_skip1(x)
print(x.shape)

resnet_block2, resnet_skip2 = get_resnet_audio_block(channel[1], channel[2], kernel_size[1], block={'p1': 2, 'p2': 2, 's1': 2, 's2': 1, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 0, 'd': 1})
x = resnet_block2(x) + resnet_skip2(x)
print(x.shape)

path1 = {'s': 1, 'p': 3, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 3, 'd' : 1}
inception_block1_path1, inception_block1_path2, inception_block1_path3, inception_block1_path4 = get_inception_audio_block(channel[2], channel[3], kernel_size[2], path1, path2, path3, path4)
x = inception_block1_path1(x) + inception_block1_path2(x) + inception_block1_path3(x) + inception_block1_path4(x)
print(x.shape)

path1 = {'s': 2, 'p': 2, 'd': 1}
path2 = {'s1': 2, 's2': 1, 'p1': 4, 'p2': 3, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 2, 'p1': 3, 'p2': 3, 'd1': 1, 'd2': 1}
path4 = {'s': 2, 'p': 2, 'd' : 1}
inception_block2_path1, inception_block2_path2, inception_block2_path3, inception_block2_path4 = get_inception_audio_block(channel[3], channel[4], kernel_size[3], path1, path2, path3, path4)
x = inception_block2_path1(x) + inception_block2_path2(x) + inception_block2_path3(x) + inception_block2_path4(x)
print(x.shape)

resnet_block3, resnet_skip3 = get_resnet_audio_block(channel[4], channel[5], kernel_size[4], block={'p1': 7, 'p2': 6, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 1, 'd': 1})
x = resnet_block3(x) + resnet_skip3(x)
print(x.shape)

path1 = {'s': 2, 'p': 8, 'd': 1}
path2 = {'s1': 2, 's2': 1, 'p1': 8, 'p2': 8, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 2, 'p1': 8, 'p2': 8, 'd1': 1, 'd2': 1}
path4 = {'s': 2, 'p': 8, 'd' : 1}
inception_block3_path1, inception_block3_path2, inception_block3_path3, inception_block3_path4 = get_inception_audio_block(channel[5], channel[6], kernel_size[5], path1, path2, path3, path4)
x = inception_block3_path1(x) + inception_block3_path2(x) + inception_block3_path3(x) + inception_block3_path4(x)
print(x.shape)

resnet_block4, resnet_skip4 = get_resnet_audio_block(channel[6], channel[7], kernel_size[6], block={'p1': 11, 'p2': 11, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 0, 'd': 1})
x = resnet_block4(x) + resnet_skip4(x)
print(x.shape)

path1 = {'s': 2, 'p': 14, 'd': 1}
path2 = {'s1': 2, 's2': 1, 'p1': 16, 'p2': 13, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 2, 'p1': 15, 'p2': 13, 'd1': 1, 'd2': 1}
path4 = {'s': 2, 'p': 14, 'd' : 1}
inception_block4_path1, inception_block4_path2, inception_block4_path3, inception_block4_path4 = get_inception_audio_block(channel[7], channel[8], kernel_size[7], path1, path2, path3, path4)
x = inception_block4_path1(x) + inception_block4_path2(x) + inception_block4_path3(x) + inception_block4_path4(x)
print(x.shape)

resnet_block5, resnet_skip5 = get_resnet_audio_block(channel[8], channel[9], kernel_size[8], block={'p1': 20, 'p2': 20, 's1': 1, 's2': 2, 'd1': 1, 'd2': 1}, skip={'s': 2, 'p': 4, 'd': 1})
x = resnet_block5(x) + resnet_skip5(x)
print(x.shape)

path1 = {'s': 1, 'p': 10, 'd': 1}
path2 = {'s1': 1, 's2': 1, 'p1': 16, 'p2': 17, 'd1': 1, 'd2': 1}
path3 = {'s1': 1, 's2': 1, 'p1': 17, 'p2': 16, 'd1': 1, 'd2': 1}
path4 = {'s': 1, 'p': 10, 'd' : 1}
inception_block5_path1, inception_block5_path2, inception_block5_path3, inception_block5_path4 = get_inception_audio_block(channel[9], channel[10], kernel_size[9], path1, path2, path3, path4)
x = inception_block5_path1(x) + inception_block5_path2(x) + inception_block5_path3(x) + inception_block5_path4(x)
print(x.shape)

torch.Size([128, 64, 8000])
torch.Size([128, 42, 4000])
torch.Size([128, 28, 4000])
torch.Size([128, 19, 1998])
torch.Size([128, 13, 1000])
torch.Size([128, 9, 500])
torch.Size([128, 6, 250])
torch.Size([128, 4, 125])
torch.Size([128, 3, 67])
torch.Size([128, 2, 41])


[1, 64, 42, 28, 19, 13, 9, 6, 4, 3, 2]

# Done

In [1]:
print('Done')

Done
