In [16]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
import sys
from torch.autograd import Variable
import math

In [17]:
import torch

a = torch.arange(6).reshape(2, 3)
print(a)
# tensor([[0, 1, 2],
#         [3, 4, 5]])

b = a.t()  # transpose
print(b.is_contiguous())  
# False, because the memory is now accessed in a different order

c = b.contiguous()
print(c.is_contiguous())  
# True, it made a copy so the elements are laid out in memory without gaps


tensor([[0, 1, 2],
        [3, 4, 5]])
False
True


In [18]:
def flip(x, dim):
    xsize = x.size()
    dim = x.dim() + dim if dim < 0 else dim
    x = x.contiguous()
    x = x.view(-1, *xsize[dim:])
    x = x.view(x.size(0), x.size(1), -1)[:, getattr(torch.arange(x.size(1)-1, -1, -1), ('cpu','cuda')[x.is_cuda])().long(), :]
    return x.view(xsize)

In [19]:
a = torch.arange(6)
print(a)

tensor([0, 1, 2, 3, 4, 5])


In [20]:
asize=a.size()

In [21]:
dim = a.dim()
asize[dim:]

torch.Size([])

In [22]:
b=a.view(-1,*asize[dim:])

In [23]:
b

tensor([0, 1, 2, 3, 4, 5])

In [24]:
c = flip(a,0)

In [25]:
c

tensor([5, 4, 3, 2, 1, 0])

In [None]:
def sinc(band,t_right):
    y_right= torch.sin(2*math.pi*band*t_right)/(2*math.pi*band*t_right)
    y_left= flip(y_right,0)

    y=torch.cat([y_left,torch.ones(1),y_right])

    return y  

In [32]:
band = 0.25

t_right = torch.arange(1, 6, dtype=torch.float32)

y = sinc(band, t_right)

print("t_right:", t_right)
print("y_right:", torch.sin(2*math.pi*band*t_right)/(2*math.pi*band*t_right))
print("y_left:", flip(torch.sin(2*math.pi*band*t_right)/(2*math.pi*band*t_right),0))
print("sinc kernel:", y)
print("shape:", y.shape)


t_right: tensor([1., 2., 3., 4., 5.])
y_right: tensor([ 6.3662e-01, -2.7828e-08, -2.1221e-01,  2.7828e-08,  1.2732e-01])
y_left: tensor([ 1.2732e-01,  2.7828e-08, -2.1221e-01, -2.7828e-08,  6.3662e-01])
sinc kernel: tensor([ 1.2732e-01,  2.7828e-08, -2.1221e-01, -2.7828e-08,  6.3662e-01,
         1.0000e+00,  6.3662e-01, -2.7828e-08, -2.1221e-01,  2.7828e-08,
         1.2732e-01])
shape: torch.Size([11])


In [28]:
class SincConv_fast(nn.Module):
    @staticmethod
    def to_mel(hz):
        return 2595 * torch.log10(1 + hz / 700)

    @staticmethod
    def to_hz(mel):
        return 700 * (10 ** (mel / 2595) - 1)

    def __init__(self, out_channels, kernel_size, sample_rate=16000, in_channels=1, stride=1, padding=0, dilation=1, bias=False, groups=1, min_low_hz=50, min_band_hz=50):

        super(SincConv_fast, self).__init__()
        if in_channels !=1:
            msg = "SincConv only support one input channel (here, in_channels = {%i})"%(in_channels)
            raise ValueError(msg)

        self.out_channels = out_channels
        self.kernel_size = kernel_size

        if kernel_size%2==0:
            self.kernel_size = kernel_size+1
        
        self.stride = stride
        self.padding = padding
        self.dilation = dilation

        if bias:
            raise ValueError("SincConv does not bias")
        if groups >1:
            raise ValueError('SincConv does not suppooer groups')

        self.sample_rate = sample_rate
        self.min_low_hz = min_low_hz
        self.min_band_hz = min_band_hz

        low_hz = 30
        high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz)

        mel = np.linspace(self.to_mel(low_hz),self.to_mel(high_hz),self.out_channels+1)
        hz = self.to_hz(mel)
        
        # filter lower frequency (out_channels, 1)
        self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1,1))

        # filter frequency band (out_channels, 1)
        self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1,1))

        # Hamming window
        n_lin = torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window
        self.window_ = 0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size)

        # (1, kernel_size/2)
        n = (self.kernel_size - 1) / 2.0
        self.n_ = 2*math.pi*torch.arange(-n,0).view(1,-1) / self.sample_rate # due to symmerty, I only need half of the time axes

    def forward(self, waveforms):
        self.n_ = self.n_.to(waveforms.device)
        self.window_ = self.window_.to(waveforms.device)

        low = self.min_low_hz + torch.abs(self.low_hz_)

        high = torch.clamp(low + self.min_band_hz + torch.abs(self.band_hz_),self.min_low_hz,self.sample_rate/2)
        band=(high-low)[:,0]

        f_times_t_low = torch.matmul(low,self.n_)
        f_times_t_high = torch.matmul(high,self.n_)

        band_pass_left = ((torch.sin(f_times_t_high)-torch.sin(f_times_t_low))/(self.n_/2))*self.window_
        band_pass_center = 2*band.view(-1,1)
        band_pass_right = torch.flip(band_pass_left,dims=[1])

        band_pass = torch.cat([band_pass_left, band_pass_center, band_pass_right], dim=1)

        band_pass = band_pass / (2*band[:,None])

        self.filters = (band_pass).view(self.out_channels, 1, self.kernel_size)

        return F.conv1d(waveforms, self.filters, stride=self.stride, padding=self.padding, dilation=self.dilation, bias=None, groups=1)

        



In [None]:
class sinc_conv(nn.Module):

    def __init__(self, N_filt, Filt_dim, fs):
        super(sinc_conv,self).__init__()

        low_freq_mel = 80
        high_freq_mel = (2595 * np.log10(1 + (fs / 2) / 700)) # convert Hz to Mel
        mel_points = np.linspace(low_freq_mel, high_freq_mel, N_filt) # Equally spaced in Mel scale
        f_cos = (700 * (10**(mel_points / 2595) - 1)) # Convert Mel to Hz
        b1 = np.roll(f_cos,1)
        b2 = np.roll(f_cos,-1)
        b1[0]=30
        b2[-1]=(fs/2)-100

        self.freq_scale = fs*1.0
        self.filt_b1 = nn.Parameter(torch.from_numpy(b1/self.freq_scale))
        self.filt_band = nn.Parameter(torch.from_numpy((b2-b1)/self.freq_scale))

        self.N_filt = N_filt
        self.Filt_dim = Filt_dim
        self.fs = fs

    def forward(self, x):

        filters = torch.zeros(self.N_filt, self.Filt_dim)
        N = self.Filt_dim
        t_right = Variable(torch.linspace(1, (N-1)/2, steps=int((N-1)/2))/self.fs).cuda()
        t_right = torch.linspace(1, (N-1)/2, step=int((N-1)/2)/self.fs)

        min_freq = 50.0;
        min_band = 50.0;

        filt_beg_freq = torch.abs(self.filt_b1)+min_freq/self.freq_scale
        filt_end_freq = filt_beg_freq+(torch.abs(self.filt_band)+min_band/self.freq_scale)

        n = torch.linspace(0, N, steps=N)

        window=0.54-0.46*torch.cos(2*math.pi*n/N);
        # window=Variable(window.float().cuda())
        window = window.float()


        for i in range(self.N_filt):

            low_pass1 = 2*filt_beg_freq[i].float()*sinc(filt_beg_freq[i].float()*self.freq_scale,t_right)
            low_pass2 = 2*filt_end_freq[i].float()*sinc(filt_end_freq[i].float()*self.freq_scale,t_right)
            band_pass=(low_pass2-low_pass1)

            band_pass = band_pass/torch.max(band_pass)

            filters[i,:] = band_pass.cuda()*window

        out = F.conv1d(x, filter.view(self.N_filt,1,self.Filt_dim))

        return out


        

In [1]:
def act_fun(act_type):

    if act_type=="relu":
        return nn.ReLU()

    if act_type=="tanh":
        return nn.Tanh()

    if act_type=="sigmoid":
        return nn.Sigmoid()

    if act_type=="leaky_relu":
        return nn.LeakyReLU(0.2)

    if act_type=="elu":
        return nn.ELU()

    if act_type=="softmax":
        return nn.LogSoftmax(dim=1)

    if act_type=="linear":
        return nn.LeakyReLU(1)