Summary:

1. Add a new file in `torchaudio/model` folder named `wavernn`, add block list as:

`__all__ = ["ResBlock", "MelResNet", "UpsamplingNet", "WaveRNN"]`

2. Change all `F.ReLU` to `nn.ReLU`

3. Use `nn.Sequential` block as `wave2letter` model

4. Add argument comments in each class

5. Test the output tensor size with a random input tensor in `test/test_models.py`


In [7]:
import torch
import torch.nn as nn
from torch import Tensor
#import torch.nn.functional as F

__all__ = ["ResBlock", "MelResNet", "UpsamplingNet", "WaveRNN"]

In [8]:
class ResBlock(nn.Module) :
    r"""
    Args:
        num_classes (int, optional): Number of dimensions in ResBlock. (Default: ``40``)
    """
    def __init__(self, dims) -> None:
        super().__init__()
        
        self.conv1 = nn.Conv1d(dims, dims, kernel_size=1, bias=False)
        self.conv2 = nn.Conv1d(dims, dims, kernel_size=1, bias=False)
        self.batch_norm1 = nn.BatchNorm1d(dims)
        self.relu = nn.ReLU(inplace=True)
        self.batch_norm2 = nn.BatchNorm1d(dims)
        
    def forward(self, x: Tensor) -> Tensor:
        residual = x
        x = self.conv1(x)
        x = self.batch_norm1(x)
        #x = F.relu(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.batch_norm2(x)
        return x + residual

class MelResNet(nn.Module):
    r"""
    Args:
        res_blocks (int, optional): Number of ResBlocks. (Default: ``10``).
        input_dims (int, optional): Number of hidden dimensions (Default: ``32``).
        hidden_dims (int, optional): Number of input dimensions (Default: ``2``).
        output_dims (int, optional): Number of ouput dimensions (Default: ``50``).
    """
    def __init__(self, res_blocks: int = 10,
                 input_dims: int = 2, 
                 hidden_dims: int = 32, 
                 output_dims: int = 50) -> None:
        super().__init__()
        
        self.conv_in = nn.Conv1d(input_dims, hidden_dims, kernel_size=5, bias=False)
        self.batch_norm = nn.BatchNorm1d(hidden_dims)
        self.relu = nn.ReLU(inplace=True)
        self.layers = nn.ModuleList()
        for i in range(res_blocks) :
            self.layers.append(ResBlock(hidden_dims))
        self.conv_out = nn.Conv1d(hidden_dims, output_dims, kernel_size=1)
        
    def forward(self, x) :
        x = self.conv_in(x)
        x = self.batch_norm(x)
        x = self.relu(x)
        #x = F.relu(x)
        for f in self.layers : x = f(x)
        x = self.conv_out(x)
        return x


In [9]:
x = torch.rand(32, 80, 20)

In [10]:
res_block = 10
in_dims = 80
compute_dims = 10
res_out_dims = 50
model = MelResNet(res_block, in_dims, compute_dims, res_out_dims)
output = model(x)

In [11]:
output.shape

torch.Size([32, 50, 16])

In [12]:
import pytest

class MelResNet:
    @pytest.mark.parametrize('batch_size', [32])
    @pytest.mark.parametrize('input_dims', [50])
    @pytest.mark.parametrize('in_length', [20])
    #@pytest.mark.parametrize('res_block', [10])
    #@pytest.mark.parametrize('hidden_dims', [64])
    @pytest.mark.parametrize('output_dims', [80])
    def test_waveform(self, batch_size, in_dims, in_length, out_dims):
        model = MelResNet()

        x = torch.rand(batch_size, in_dims, in_length)
        out = model(x)

        assert out.size() == (batch_size, out_dims, in_length-4)