In [1]:
!pip3 install torchinfo

Collecting torchinfo
  Downloading https://files.pythonhosted.org/packages/4f/b1/4b310bd715885636e7174b4b52817202fff0ae3609ca2bfb17f28e33e0a1/torchinfo-0.0.8-py3-none-any.whl
Installing collected packages: torchinfo
Successfully installed torchinfo-0.0.8


In [2]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
import sys
from torch.autograd import Variable
import math
# from torchsummary import summary
from torchinfo import summary

import os
import soundfile as sf
from os import listdir
from os.path import isdir, join
import pathlib
import torch.optim as optim
import tensorflow as tf

output_folder = pathlib.Path('/content/output')

In [3]:
!mkdir output

In [4]:
 print(torch.cuda.device_count())

1


In [5]:
print(torch.cuda.get_device_name(0))

Tesla T4


**Data Import and Processing**

In [6]:
data_dir = pathlib.Path('/content/data')
if not data_dir.exists():
  tf.keras.utils.get_file(
      'speech_commands_v0.02.tar.gz',
      origin="http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz",
      extract=True,
      cache_dir='.', cache_subdir='data')

Downloading data from http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz


In [7]:
keywords = [name for name in listdir(data_dir) if isdir(join(data_dir, name))]
#remove bg noise as it not needed and requires extra pre-processing
keywords.remove('_background_noise_')
print(keywords)

['dog', 'follow', 'two', 'bird', 'marvin', 'bed', 'cat', 'four', 'learn', 'eight', 'off', 'left', 'go', 'forward', 'house', 'one', 'stop', 'no', 'backward', 'visual', 'zero', 'sheila', 'yes', 'up', 'wow', 'seven', 'right', 'six', 'happy', 'down', 'on', 'tree', 'nine', 'three', 'five']


In [8]:
word2index = {
    # core words
    "backward": 0,
    "bed": 1,
    "bird": 2,
    "cat": 3,
    "dog": 4,
    "down": 5,
    "eight": 6,
    "five": 7,
    "follow": 8,
    "forward": 9,
    "four": 10,
    "go": 11,
    "happy": 12,
    "house": 13,
    "learn": 14,
    "left": 15,
    "marvin": 16,
    "nine": 17,
    "no": 18,
    "off": 19,
    "on":20,
    "one":21,
    "right":22,
    "seven":23,
    "sheila":24,
    "six":25,
    "stop":26,
    "three":27,
    "tree":28,
    "two":29,
    "up":30,
    "visual":31,
    "wow":32,
    "yes":33,
    "zero":34
}

index2word = [word for word in word2index]

In [9]:
filenames = []
y = []
for word_class in word2index:
#     print(join(raw_data_path, trget))
#     filenames.append(listdir(join(raw_data_path, target)))
    for files in listdir(join(data_dir, word_class)):
        filenames.append(join(word_class, files))
        y.append(word2index[word_class]) 

In [12]:
#create a dictionary of the filenames and labels
combined_dict = dict(zip(filenames,y))
#save that dictionary
np.save('files_dict', combined_dict)

In [13]:
from sklearn.model_selection import train_test_split
train_data, validation_data, train_classes, validation_classes = train_test_split(filenames, y,
                                                                      test_size=0.2, random_state=42, shuffle=True)

In [14]:
def getLists(files_in, output_name):
  MyFile=open(output_name,'w')

  for element in files_in:
      MyFile.write(element)
      MyFile.write('\n')
  MyFile.close()

In [15]:
getLists(filenames, 'fileslist.scp')
getLists(validation_data, 'test.scp')
getLists(train_data, 'train.scp')

## Implementation



To Implement sinconv I need to:
1. Import all relevant pytorch modules
2. Correctly copy over all relevant variables that are used in cfg
3. Correctly implement the SincConv layer
4. Implement my own set of CNN layers.
5. Train and test

I must note that there are many different files and classes in SincNet and I have to adjust them to work properly in this .ipynb environment

In [16]:
def flip(x, dim):
    xsize = x.size()
    dim = x.dim() + dim if dim < 0 else dim
    x = x.contiguous()
    x = x.view(-1, *xsize[dim:])
    x = x.view(x.size(0), x.size(1), -1)[:, getattr(torch.arange(x.size(1)-1, 
                      -1, -1), ('cpu','cuda')[x.is_cuda])().long(), :]
    return x.view(xsize)


def sinc(band,t_right):
    y_right= torch.sin(2*math.pi*band*t_right)/(2*math.pi*band*t_right)
    y_left= flip(y_right,0)

    y=torch.cat([y_left,Variable(torch.ones(1)).cuda(),y_right])

    return y
    

class SincConv_fast(nn.Module):
    """Sinc-based convolution
    Parameters
    ----------
    in_channels : `int`
        Number of input channels. Must be 1.
    out_channels : `int`
        Number of filters.
    kernel_size : `int`
        Filter length.
    sample_rate : `int`, optional
        Sample rate. Defaults to 16000.
    Usage
    -----
    See `torch.nn.Conv1d`
    Reference
    ---------
    Mirco Ravanelli, Yoshua Bengio,
    "Speaker Recognition from raw waveform with SincNet".
    https://arxiv.org/abs/1808.00158
    """

    @staticmethod
    def to_mel(hz):
        return 2595 * np.log10(1 + hz / 700)

    @staticmethod
    def to_hz(mel):
        return 700 * (10 ** (mel / 2595) - 1)

    def __init__(self, out_channels, kernel_size, sample_rate=16000, in_channels=1,
                 stride=1, padding=0, dilation=1, bias=False, groups=1, min_low_hz=50, min_band_hz=50):

        super(SincConv_fast,self).__init__()

        if in_channels != 1:
            #msg = (f'SincConv only support one input channel '
            #       f'(here, in_channels = {in_channels:d}).')
            msg = "SincConv only support one input channel (here, in_channels = {%i})" % (in_channels)
            raise ValueError(msg)

        self.out_channels = out_channels
        self.kernel_size = kernel_size
        
        # Forcing the filters to be odd (i.e, perfectly symmetrics)
        if kernel_size%2==0:
            self.kernel_size=self.kernel_size+1
            
        self.stride = stride
        self.padding = padding
        self.dilation = dilation

        if bias:
            raise ValueError('SincConv does not support bias.')
        if groups > 1:
            raise ValueError('SincConv does not support groups.')

        self.sample_rate = sample_rate
        self.min_low_hz = min_low_hz
        self.min_band_hz = min_band_hz

        # initialize filterbanks such that they are equally spaced in Mel scale
        low_hz = 30
        high_hz = self.sample_rate / 2 - (self.min_low_hz + self.min_band_hz)

        mel = np.linspace(self.to_mel(low_hz),
                          self.to_mel(high_hz),
                          self.out_channels + 1)
        hz = self.to_hz(mel)
        

        # filter lower frequency (out_channels, 1)
        self.low_hz_ = nn.Parameter(torch.Tensor(hz[:-1]).view(-1, 1))

        # filter frequency band (out_channels, 1)
        self.band_hz_ = nn.Parameter(torch.Tensor(np.diff(hz)).view(-1, 1))

        # Hamming window
        #self.window_ = torch.hamming_window(self.kernel_size)
        n_lin=torch.linspace(0, (self.kernel_size/2)-1, steps=int((self.kernel_size/2))) # computing only half of the window
        self.window_=0.54-0.46*torch.cos(2*math.pi*n_lin/self.kernel_size);


        # (1, kernel_size/2)
        n = (self.kernel_size - 1) / 2.0
        self.n_ = 2*math.pi*torch.arange(-n, 0).view(1, -1) / self.sample_rate # Due to symmetry, I only need half of the time axes

 


    def forward(self, waveforms):
        """
        Parameters
        ----------
        waveforms : `torch.Tensor` (batch_size, 1, n_samples)
            Batch of waveforms.
        Returns
        -------
        features : `torch.Tensor` (batch_size, out_channels, n_samples_out)
            Batch of sinc filters activations.
        """

        self.n_ = self.n_.to(waveforms.device)

        self.window_ = self.window_.to(waveforms.device)

        low = self.min_low_hz  + torch.abs(self.low_hz_)
        
        high = torch.clamp(low + self.min_band_hz + torch.abs(self.band_hz_),self.min_low_hz,self.sample_rate/2)
        band=(high-low)[:,0]
        
        f_times_t_low = torch.matmul(low, self.n_)
        f_times_t_high = torch.matmul(high, self.n_)

        band_pass_left=((torch.sin(f_times_t_high)-torch.sin(f_times_t_low))/(self.n_/2))*self.window_ # Equivalent of Eq.4 of the reference paper (SPEAKER RECOGNITION FROM RAW WAVEFORM WITH SINCNET). I just have expanded the sinc and simplified the terms. This way I avoid several useless computations. 
        band_pass_center = 2*band.view(-1,1)
        band_pass_right= torch.flip(band_pass_left,dims=[1])
        
        
        band_pass=torch.cat([band_pass_left,band_pass_center,band_pass_right],dim=1)

        
        band_pass = band_pass / (2*band[:,None])
        

        self.filters = (band_pass).view(
            self.out_channels, 1, self.kernel_size)

        return F.conv1d(waveforms, self.filters, stride=self.stride,
                        padding=self.padding, dilation=self.dilation,
                         bias=None, groups=1) 

In [17]:
class LayerNorm(nn.Module):

    def __init__(self, features, eps=1e-6):
        super(LayerNorm,self).__init__()
        self.gamma = nn.Parameter(torch.ones(features))
        self.beta = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.gamma * (x - mean) / (std + self.eps) + self.beta

In [18]:
def act_fun(act_type):

 if act_type=="relu":
    return nn.ReLU()
            
 if act_type=="tanh":
    return nn.Tanh()
            
 if act_type=="sigmoid":
    return nn.Sigmoid()
           
 if act_type=="leaky_relu":
    return nn.LeakyReLU(0.2)
            
 if act_type=="elu":
    return nn.ELU()
                     
 if act_type=="softmax":
    return nn.LogSoftmax()
        
 if act_type=="linear":
    return nn.LeakyReLU(1)

In [19]:
class SincNet(nn.Module):
    
    def __init__(self,options):
       super(SincNet,self).__init__()
    
       self.cnn_N_filt=options['cnn_N_filt'] #80,60,60
       self.cnn_len_filt=options['cnn_len_filt'] #251,5,5
       self.cnn_max_pool_len=options['cnn_max_pool_len']#3,3,3
       
       
       self.cnn_act=options['cnn_act']#leaky_relu
       self.cnn_drop=options['cnn_drop']#0.0
       
       self.cnn_use_laynorm=options['cnn_use_laynorm']#True
       self.cnn_use_batchnorm=options['cnn_use_batchnorm']
       self.cnn_use_laynorm_inp=options['cnn_use_laynorm_inp']#True
       self.cnn_use_batchnorm_inp=options['cnn_use_batchnorm_inp']
       
       self.input_dim=int(options['input_dim'])#3200 (wlen)
       
       self.fs=options['fs'] #16,000
       
       self.N_cnn_lay=len(options['cnn_N_filt'])#3
       self.conv  = nn.ModuleList([])
       self.bn  = nn.ModuleList([])
       self.ln  = nn.ModuleList([])
       self.act = nn.ModuleList([])
       self.drop = nn.ModuleList([])
       self.linear = nn.ModuleList([])
       
             
       if self.cnn_use_laynorm_inp:
           self.ln0=LayerNorm(self.input_dim)
           
       if self.cnn_use_batchnorm_inp:
           self.bn0=nn.BatchNorm1d([self.input_dim],momentum=0.05)
           
       current_input=self.input_dim 
       
       for i in range(self.N_cnn_lay):#loop len=3
         
         N_filt=int(self.cnn_N_filt[i])#80,60,60
         len_filt=int(self.cnn_len_filt[i])#251,5,5
         
         # dropout
         self.drop.append(nn.Dropout(p=self.cnn_drop[i]))#0.0
         
         # activation
         self.act.append(act_fun(self.cnn_act[i]))#leaky_Relu
                    
         # layer norm initialization         
         self.ln.append(LayerNorm([N_filt,int((current_input-self.cnn_len_filt[i]+1)/self.cnn_max_pool_len[i])]))

         self.bn.append(nn.BatchNorm1d(N_filt,int((current_input-self.cnn_len_filt[i]+1)/self.cnn_max_pool_len[i]),momentum=0.05))
            

         if i==0:
          self.conv.append(SincConv_fast(self.cnn_N_filt[0],self.cnn_len_filt[0],self.fs))#on first pass send params to SincConv
              
         else:
          self.conv.append(nn.Conv1d(self.cnn_N_filt[i-1], self.cnn_N_filt[i], self.cnn_len_filt[i]))#this has 2 passes - are a Conv1D 
         current_input=int((current_input-self.cnn_len_filt[i]+1)/self.cnn_max_pool_len[i])

       self.linear.append(nn.Linear(6420, 500))
       self.linear.append(nn.Linear(500, 35))
       self.drop.append(nn.Dropout(p=0.5))
       self.act.append(act_fun(self.cnn_act[3]))
      #  self.out_dim=current_input*N_filt



    def forward(self, x): #is x inherited from the above function i.e. is it out_dim?
       batch=x.shape[0]
       seq_len=x.shape[1]
       
       if bool(self.cnn_use_laynorm_inp):
        x=self.ln0((x))
        
       if bool(self.cnn_use_batchnorm_inp):
        x=self.bn0((x))
        
       x=x.view(batch,1,seq_len)

       
       for i in range(self.N_cnn_lay):
          
         if self.cnn_use_laynorm[i]:
          if i==0:
           x = self.drop[i](self.act[i](self.ln[i](F.max_pool1d(torch.abs(self.conv[i](x)), self.cnn_max_pool_len[i]))))  
          else:
           x = self.drop[i](self.act[i](self.ln[i](F.max_pool1d(self.conv[i](x), self.cnn_max_pool_len[i]))))   
          
         if self.cnn_use_batchnorm[i]:
          x = self.drop[i](self.act[i](self.bn[i](F.max_pool1d(self.conv[i](x), self.cnn_max_pool_len[i]))))

         if self.cnn_use_batchnorm[i]==False and self.cnn_use_laynorm[i]==False:
          x = self.drop[i](self.act[i](F.max_pool1d(self.conv[i](x), self.cnn_max_pool_len[i])))

         
      #  x=torch.flatten(x)
       x = x.view(batch,-1)
       x=self.linear[0](x)
       x=self.linear[1](x)
       x=self.drop[3](x) 
       x=self.act[3](x)
      

       return x
   

In [None]:
#variables for architectures:
wlen=3200
wshift = 160
fs=16000
cnn_N_filt=80,60,60
# linear_len=6420,500, 35
cnn_len_filt=251,5,5
cnn_max_pool_len=3,3,3
cnn_use_laynorm_inp=True
cnn_use_batchnorm_inp=False
cnn_use_laynorm=True,True,True
cnn_use_batchnorm=False,False,False
cnn_act='leaky_relu','leaky_relu','leaky_relu','softmax'
cnn_drop=0.0,0.0,0.0

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
CNN_arch = {'input_dim': wlen,
          'fs': fs,
          'cnn_N_filt': cnn_N_filt,
          'cnn_len_filt': cnn_len_filt,
          'cnn_max_pool_len':cnn_max_pool_len,
          'cnn_use_laynorm_inp': cnn_use_laynorm_inp,
          'cnn_use_batchnorm_inp': cnn_use_batchnorm_inp,
          'cnn_use_laynorm':cnn_use_laynorm,
          'cnn_use_batchnorm':cnn_use_batchnorm,
          'cnn_act': cnn_act,
          'cnn_drop':cnn_drop,          
          }

SincLayer=SincNet(CNN_arch)
SincLayer.cuda()
print(SincLayer)
n = count_parameters(SincLayer)
print("Number of parameters: %s" % n)

SincNet(
  (conv): ModuleList(
    (0): SincConv_fast()
    (1): Conv1d(80, 60, kernel_size=(5,), stride=(1,))
    (2): Conv1d(60, 60, kernel_size=(5,), stride=(1,))
  )
  (bn): ModuleList(
    (0): BatchNorm1d(80, eps=983, momentum=0.05, affine=True, track_running_stats=True)
    (1): BatchNorm1d(60, eps=326, momentum=0.05, affine=True, track_running_stats=True)
    (2): BatchNorm1d(60, eps=107, momentum=0.05, affine=True, track_running_stats=True)
  )
  (ln): ModuleList(
    (0): LayerNorm()
    (1): LayerNorm()
    (2): LayerNorm()
  )
  (act): ModuleList(
    (0): LeakyReLU(negative_slope=0.2)
    (1): LeakyReLU(negative_slope=0.2)
    (2): LeakyReLU(negative_slope=0.2)
    (3): LogSoftmax(dim=None)
  )
  (drop): ModuleList(
    (0): Dropout(p=0.0, inplace=False)
    (1): Dropout(p=0.0, inplace=False)
    (2): Dropout(p=0.0, inplace=False)
    (3): Dropout(p=0.5, inplace=False)
  )
  (linear): ModuleList(
    (0): Linear(in_features=6420, out_features=500, bias=True)
    (1): Linea

In [None]:
data_folder = data_dir
outdim = len(keywords)
lab_dict = np.load('files_dict.npy', allow_pickle=True).item()
N_batches=800
N_epochs=10

class_lay=35
class_drop=0.0
class_use_laynorm_inp=False
class_use_batchnorm_inp=False
class_use_batchnorm=False
class_use_laynorm=False
class_act='softmax'


lr=0.001
batch_size=128
Batch_dev=128
N_epochs=100
N_batches=800
N_eval_epoch=8
seed=42

torch.manual_seed(seed)
np.random.seed(seed)

# loss function
cost = nn.NLLLoss()

In [None]:
def ReadList(list_file):
 f=open(list_file,"r")
 lines=f.readlines()
 list_sig=[]
 for x in lines:
    list_sig.append(x.rstrip())
 f.close()
 return list_sig

In [None]:
wav_lst_tr=ReadList('train.scp')
snt_tr=len(wav_lst_tr)

# test list
wav_lst_te=ReadList('test.scp')
snt_te=len(wav_lst_te)

In [None]:
def create_batches_rnd(batch_size,data_folder,wav_lst,N_snt,wlen,lab_dict,fact_amp):
    
 # Initialization of the minibatch (batch_size,[0=>x_t,1=>x_t+N,1=>random_samp])
 sig_batch=np.zeros([batch_size,wlen])
 lab_batch=np.zeros(batch_size)
  
 snt_id_arr=np.random.randint(N_snt, size=batch_size)
 
 rand_amp_arr = np.random.uniform(1.0-fact_amp,1+fact_amp,batch_size)

 for i in range(batch_size):
     
  # select a random sentence from the list 
  #[fs,signal]=scipy.io.wavfile.read(data_folder+wav_lst[snt_id_arr[i]])
  #signal=signal.astype(float)/32768

  [signal, fs] = sf.read(str(data_folder)+'/'+wav_lst[snt_id_arr[i]])

  # print(str(data_folder)+'/'+wav_lst[snt_id_arr[i]])
  # print(signal.shape)
  # accesing to a random chunk
  snt_len=signal.shape[0]
  snt_beg=np.random.randint(snt_len-wlen-1) #randint(0, snt_len-2*wlen-1)
  snt_end=snt_beg+wlen

  channels = len(signal.shape)
  if channels == 2:
    print('WARNING: stereo to mono: '+data_folder+wav_lst[snt_id_arr[i]])
    signal = signal[:,0]
  
  sig_batch[i,:]=signal[snt_beg:snt_end]*rand_amp_arr[i]
  lab_batch[i]=lab_dict[wav_lst[snt_id_arr[i]]]
  
 inp=Variable(torch.from_numpy(sig_batch).float().cuda().contiguous())
 lab=Variable(torch.from_numpy(lab_batch).float().cuda().contiguous())
#  print(lab)
 return inp,lab  

In [None]:
[inp,lab]=create_batches_rnd(128,data_folder,wav_lst_tr,snt_tr,wlen,lab_dict,0.2)
print(wlen)
print(inp.shape)
summary(SincLayer, input_data=[inp])
# pout1 =SincLayer(inp)
# print('SINC +Conv OUTPUT TENSOR: ')
# print(pout1)
# pout1=pout1.reshape(128, 856, -1)
# print('SINC +Conv OUT SHAPE: ')
# print(pout1.shape)


3200
torch.Size([128, 3200])




Layer (type:depth-idx)                   Output Shape              Param #
├─LayerNorm: 1-1                         [128, 3200]               6,400
├─ModuleList: 1                          []                        --
|    └─SincConv_fast: 2-1                [128, 80, 2950]           160
├─ModuleList: 1                          []                        --
|    └─LayerNorm: 2-2                    [128, 80, 983]            157,280
├─ModuleList: 1                          []                        --
|    └─LeakyReLU: 2-3                    [128, 80, 983]            --
├─ModuleList: 1                          []                        --
|    └─Dropout: 2-4                      [128, 80, 983]            --
├─ModuleList: 1                          []                        --
|    └─Conv1d: 2-5                       [128, 60, 979]            24,060
├─ModuleList: 1                          []                        --
|    └─LayerNorm: 2-6                    [128, 60, 326]            39,12

In [None]:
pout1 =SincLayer(inp)
print('SINC +Conv OUTPUT TENSOR: ')
print(pout1)
# pout1=pout1.reshape(128, 856, -1)
print('SINC +Conv OUT SHAPE: ')
print(pout1.shape)

SINC +Conv OUTPUT TENSOR: 
tensor([[-3.6250, -3.8474, -3.2508,  ..., -3.6250, -2.9977, -3.6250],
        [-3.9154, -3.6919, -3.5398,  ..., -3.3466, -3.9104, -3.9750],
        [-3.5921, -3.5921, -3.5921,  ..., -3.1246, -4.1486, -3.0319],
        ...,
        [-2.8480, -3.7014, -3.1062,  ..., -4.5957, -3.6975, -4.0401],
        [-3.6221, -3.6221, -3.6221,  ..., -3.6221, -3.6221, -3.6221],
        [-3.7958, -4.1171, -3.6694,  ..., -3.4443, -2.8479, -3.8363]],
       device='cuda:0', grad_fn=<LogSoftmaxBackward>)
SINC +Conv OUT SHAPE: 
torch.Size([128, 35])




In [None]:
output_folder = str(output_folder)

In [None]:
optimizer_CNN = optim.RMSprop(SincLayer.parameters(), lr=lr,alpha=0.95, eps=1e-8) 
# optimizer_DNN1 = optim.RMSprop(ConvLayer.parameters(), lr=lr,alpha=0.95, eps=1e-8) 
# optimizer_DNN2 = optim.RMSprop(ConvLayer2.parameters(), lr=lr,alpha=0.95, eps=1e-8) 

for epoch in range(N_epochs):
  
  test_flag=0
  SincLayer.train()
  # ConvLayer.train()

 
  loss_sum=0
  err_sum=0

  for i in range(N_batches):

    [inp,lab]=create_batches_rnd(batch_size,data_folder,wav_lst_tr,snt_tr,wlen,lab_dict,0.2)
    # print(inp.shape)
    pout=SincLayer(inp)
    # print(pout.shape)
    
    pred=torch.max(pout,dim=1)[1]
    loss = cost(pout, lab.long())
    err = torch.mean((pred!=lab.long()).float())
    
   
    
    optimizer_CNN.zero_grad()
    # optimizer_DNN1.zero_grad() 
    # optimizer_DNN2.zero_grad() 

    loss.backward()
    optimizer_CNN.step()
    # optimizer_DNN1.step()
    # optimizer_DNN2.step()
    
    loss_sum=loss_sum+loss.detach()
    err_sum=err_sum+err.detach()
 

  loss_tot=loss_sum/N_batches
  err_tot=err_sum/N_batches
  
 
   
   
# Full Validation  new  
  if epoch%N_eval_epoch==0:
      
   SincLayer.eval()
  #  ConvLayer.eval()
   
   test_flag=1 
   loss_sum=0
   err_sum=0
   err_sum_snt=0
   
   with torch.no_grad():  
    for i in range(snt_te):
       
     #[fs,signal]=scipy.io.wavfile.read(data_folder+wav_lst_te[i])
     #signal=signal.astype(float)/32768

     [signal, fs] = sf.read(str(data_folder)+'/'+wav_lst_te[i])

     signal=torch.from_numpy(signal).float().cuda().contiguous()
     lab_batch=lab_dict[wav_lst_te[i]]
     
     # split signals into chunks
     beg_samp=0
     end_samp=wlen
     
     N_fr=int((signal.shape[0]-wlen)/(wshift))
     

     sig_arr=torch.zeros([Batch_dev,wlen]).float().cuda().contiguous()
     lab= Variable((torch.zeros(N_fr+1)+lab_batch).cuda().contiguous().long())
     pout=Variable(torch.zeros(N_fr+1,class_lay).float().cuda().contiguous())
     count_fr=0
     count_fr_tot=0
     while end_samp<signal.shape[0]:
         sig_arr[count_fr,:]=signal[beg_samp:end_samp]
         beg_samp=beg_samp+wshift
         end_samp=beg_samp+wlen
         count_fr=count_fr+1
         count_fr_tot=count_fr_tot+1
         if count_fr==Batch_dev:
             inp=Variable(sig_arr)
             pout[count_fr_tot-Batch_dev:count_fr_tot,:]=SincLayer(inp)
             count_fr=0
             sig_arr=torch.zeros([Batch_dev,wlen]).float().cuda().contiguous()
   
     if count_fr>0:
      inp=Variable(sig_arr[0:count_fr])
      pout[count_fr_tot-count_fr:count_fr_tot,:]=SincLayer(inp)

    
     pred=torch.max(pout,dim=1)[1]
     loss = cost(pout, lab.long())
     err = torch.mean((pred!=lab.long()).float())
    
     [val,best_class]=torch.max(torch.sum(pout,dim=0),0)
     err_sum_snt=err_sum_snt+(best_class!=lab[0]).float()
    
    
     loss_sum=loss_sum+loss.detach()
     err_sum=err_sum+err.detach()
    
    err_tot_dev_snt=err_sum_snt/snt_te
    loss_tot_dev=loss_sum/snt_te
    err_tot_dev=err_sum/snt_te

  
   print("epoch %i, loss_tr=%f err_tr=%f loss_te=%f err_te=%f err_te_snt=%f" % (epoch, loss_tot,err_tot,loss_tot_dev,err_tot_dev,err_tot_dev_snt))
  
   with open(str(output_folder)+"/res.res", "a") as res_file:
    res_file.write("epoch %i, loss_tr=%f err_tr=%f loss_te=%f err_te=%f err_te_snt=%f\n" % (epoch, loss_tot,err_tot,loss_tot_dev,err_tot_dev,err_tot_dev_snt))   

   checkpoint={'SincLayer_par': SincLayer.state_dict()
               }
   torch.save(checkpoint,output_folder+'/model_raw.pkl')
  
  else:
   print("epoch %i, loss_tr=%f err_tr=%f" % (epoch, loss_tot,err_tot))



epoch 0, loss_tr=3.250765 err_tr=0.861504 loss_te=2.955486 err_te=0.795545 err_te_snt=0.526316
epoch 1, loss_tr=3.190054 err_tr=0.843457
epoch 2, loss_tr=3.158640 err_tr=0.836406
epoch 3, loss_tr=3.125574 err_tr=0.827119
epoch 4, loss_tr=3.102938 err_tr=0.821416
epoch 5, loss_tr=3.092814 err_tr=0.818437
epoch 6, loss_tr=3.067305 err_tr=0.812676
epoch 7, loss_tr=3.057657 err_tr=0.809687
epoch 8, loss_tr=3.051307 err_tr=0.808281 loss_te=2.701588 err_te=0.716995 err_te_snt=0.339885
epoch 9, loss_tr=3.041698 err_tr=0.805771
epoch 10, loss_tr=3.032778 err_tr=0.804482
epoch 11, loss_tr=3.030358 err_tr=0.803281
epoch 12, loss_tr=3.017832 err_tr=0.800049
epoch 13, loss_tr=3.005198 err_tr=0.795693
epoch 14, loss_tr=3.006149 err_tr=0.798672
epoch 15, loss_tr=3.003389 err_tr=0.796133
epoch 16, loss_tr=2.996076 err_tr=0.798145 loss_te=2.598333 err_te=0.687237 err_te_snt=0.272513
epoch 17, loss_tr=2.990009 err_tr=0.794844
epoch 18, loss_tr=2.988358 err_tr=0.794297
epoch 19, loss_tr=2.979622 err_tr=

In [None]:
model = SincNet(CNN_arch)
model.load_state_dict(torch.load(output_folder+'/model_raw.pkl'), strict=False)
model.eval()

SincNet(
  (conv): ModuleList(
    (0): SincConv_fast()
    (1): Conv1d(80, 60, kernel_size=(5,), stride=(1,))
    (2): Conv1d(60, 60, kernel_size=(5,), stride=(1,))
  )
  (bn): ModuleList(
    (0): BatchNorm1d(80, eps=983, momentum=0.05, affine=True, track_running_stats=True)
    (1): BatchNorm1d(60, eps=326, momentum=0.05, affine=True, track_running_stats=True)
    (2): BatchNorm1d(60, eps=107, momentum=0.05, affine=True, track_running_stats=True)
  )
  (ln): ModuleList(
    (0): LayerNorm()
    (1): LayerNorm()
    (2): LayerNorm()
  )
  (act): ModuleList(
    (0): LeakyReLU(negative_slope=0.2)
    (1): LeakyReLU(negative_slope=0.2)
    (2): LeakyReLU(negative_slope=0.2)
    (3): LogSoftmax(dim=None)
  )
  (drop): ModuleList(
    (0): Dropout(p=0.0, inplace=False)
    (1): Dropout(p=0.0, inplace=False)
    (2): Dropout(p=0.0, inplace=False)
    (3): Dropout(p=0.5, inplace=False)
  )
  (linear): ModuleList(
    (0): Linear(in_features=6420, out_features=500, bias=True)
    (1): Linea

In [None]:
[inp,lab]=create_batches_rnd(1,data_folder,wav_lst_tr,snt_tr,wlen,lab_dict,0.2)
lab

tensor([15.], device='cuda:0')

In [None]:
inp=inp.cuda()
model = model.cuda()

In [None]:
prediction = model(inp)
pred = torch.max(prediction,dim=1)[1]



In [None]:
pred

tensor([4], device='cuda:0')