In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pitch_tracker.ml.model.net import Audio_CNN, Audio_CRNN, create_conv2d_block, conv2d_output_shape
import torch
from torch import nn
from torchinfo import summary

from math import floor

  INST_TAXONOMY = yaml.load(fhandle)
  MIXING_COEFFICIENTS = yaml.load(fhandle)


In [56]:
class Test_Model(nn.Module):
    def __init__(self):
        super(Test_Model, self).__init__()
        self.conv2d_block1 = create_conv2d_block(
            conv2d_input=(1,64,3),
            padding='same',
            maxpool_kernel_size=None,
        )
        
        self.conv2d_block2 = create_conv2d_block(
            conv2d_input=(64,64,3),
            padding='same',
            maxpool_kernel_size=(1,5),
        )

        self.conv2d_block3 = create_conv2d_block(
            conv2d_input=(64,64,3),
            padding='same',
            maxpool_kernel_size=(1,5),
        )
        
        self.conv2d_block4 = create_conv2d_block(
            conv2d_input=(64,210,3),
            padding='same',
            # maxpool_kernel_size=(1,5),
        )

        self.flatten_layer = nn.Flatten(start_dim=2)
        self.gru = nn.GRU(
            input_size=3150,
            hidden_size=88,
            batch_first=True,
            dropout=0.2
        )
        self.dense_layer = nn.Linear(88,88)
        # self.output_layer = nn.Linear(128, 88)
        
    def forward(self, x):
        x = self.conv2d_block1(x)
        x = self.conv2d_block2(x)
        x = self.conv2d_block3(x)
        x = self.conv2d_block4(x)
        flat = self.flatten_layer(x)
        sequence, h_n = self.gru(flat)
        out = self.dense_layer(sequence)
        # x = self.output_layer(x)
        return out
            



In [57]:
model = Test_Model()
print(model)
dummy_input = torch.randn((4,1,1050,88))
out = model(dummy_input)
print(out.shape)

Test_Model(
  (conv2d_block1): Sequential(
    (conv2d): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (relu): ReLU()
    (batch_norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv2d_block2): Sequential(
    (conv2d): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (relu): ReLU()
    (batch_norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (maxpool2d): MaxPool2d(kernel_size=(1, 5), stride=(1, 5), padding=0, dilation=1, ceil_mode=False)
  )
  (conv2d_block3): Sequential(
    (conv2d): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (relu): ReLU()
    (batch_norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (maxpool2d): MaxPool2d(kernel_size=(1, 5), stride=(1, 5), padding=0, dilation=1, ceil_mode=False)
  )
  (conv2d_block4): Sequential(
    (conv2d): Conv2d(64, 210, kernel_size=(3, 3), stride=(1, 1),

In [59]:
summary(model, input_size=(4, 1, 1050, 88))

Layer (type:depth-idx)                   Output Shape              Param #
Test_Model                               [4, 210, 88]              --
├─Sequential: 1-1                        [4, 64, 1050, 88]         --
│    └─Conv2d: 2-1                       [4, 64, 1050, 88]         640
│    └─ReLU: 2-2                         [4, 64, 1050, 88]         --
│    └─BatchNorm2d: 2-3                  [4, 64, 1050, 88]         128
├─Sequential: 1-2                        [4, 64, 1050, 17]         --
│    └─Conv2d: 2-4                       [4, 64, 1050, 88]         36,928
│    └─ReLU: 2-5                         [4, 64, 1050, 88]         --
│    └─BatchNorm2d: 2-6                  [4, 64, 1050, 88]         128
│    └─MaxPool2d: 2-7                    [4, 64, 1050, 17]         --
├─Sequential: 1-3                        [4, 64, 1050, 3]          --
│    └─Conv2d: 2-8                       [4, 64, 1050, 17]         36,928
│    └─ReLU: 2-9                         [4, 64, 1050, 17]         --
│   

In [None]:
import yaml
from collections import OrderedDict

In [None]:
p = {
    # dataset
    'batch_size': 4,
    # fit
    'n_epochs': 5,
    'learning_rate': 1e-3,
    # early stopping
    'es_patience': 10,
    'es_verbose': True,
    'es_dir_path': './checkpoints',
    # lr scheduler
    'ls_patience': 8,
    'ls_factor': 0.2,
    # misc
    'device': 'DEVICE',
}

with open('config.yaml', 'w') as f:
    yaml.dump(p,f,sort_keys=False)

In [None]:
dummy_input.shape

In [7]:
import torch

in_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
out_names = [ "output1" ]

torch.onnx.export(
    model,
    dummy_input,
    f="dummy_model.onnx",
    # input_names=in_names,
    output_names=out_names,
    opset_version=12,
    do_constant_folding=True,
    verbose=True)

Exported graph: graph(%input.1 : Float(4, 1, 1050, 88, strides=[92400, 92400, 88, 1], requires_grad=0, device=cpu),
      %conv2d_block1.conv2d.weight : Float(256, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=1, device=cpu),
      %conv2d_block1.conv2d.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %conv2d_block1.batch_norm.weight : Float(256, strides=[1], requires_grad=1, device=cpu),
      %conv2d_block1.batch_norm.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %conv2d_block1.batch_norm.running_mean : Float(256, strides=[1], requires_grad=0, device=cpu),
      %conv2d_block1.batch_norm.running_var : Float(256, strides=[1], requires_grad=0, device=cpu),
      %conv2d_block2.conv2d.weight : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=1, device=cpu),
      %conv2d_block2.conv2d.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %conv2d_block2.batch_norm.running_mean : Float(256, strides=[1], requires_grad=0, device

  _C._jit_pass_onnx_node_shape_type_inference(node, params_dict, opset_version)
  _C._jit_pass_onnx_graph_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(
