In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import math
import copy
import matplotlib.pyplot as plt
%matplotlib inline
from tensorboardX import SummaryWriter
import time


from scipy import fft
import librosa as lib

import torch.optim as optim
import pandas as pd
import numpy as np
import sys
import os
sys.path.append('/home/ilya/workspace/ESC-50')
from utils import ESC50

from sklearn.metrics import roc_auc_score

best_score = float("-inf")

In [2]:
class EnergyError(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

In [3]:
class Wavelet(nn.Module):
    def __init__(self, num_layers, kernel_size, stride=1,
                 padding=1, dilation=1, groups=1, disp=False):
        super(Wavelet, self).__init__()
        
        self.num_layers = num_layers
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.groups = groups
        self.disp = disp
        
            
        self.weight_hi = nn.Parameter(torch.Tensor(1,1,kernel_size))
        self.weight_lo = nn.Parameter(torch.Tensor(1,1,kernel_size))
        
        self.weights = []
        self.weights.append(self.weight_hi) 
        for i in range(1,num_layers):
            self.weights.append(torch.Tensor(1,1,kernel_size*(2**i)))
        self.weights.append(torch.Tensor(1,1,kernel_size*2**(num_layers-1)))
        
        self.weights_dec = copy.deepcopy(self.weights)
        
        self.reset_parameters()

# # Из фильтра высоких частот        
    def reset_parameters(self):
#         len1, len2 = math.ceil(self.kernel_size/2), math.floor(self.kernel_size/2)
#         part_one = nn.Parameter(torch.Tensor(1,1,len1))
#         part_two = nn.Parameter(torch.Tensor(1,1,len2))
#         nn.init.xavier_uniform_(part_one)
#         nn.init.xavier_uniform_(part_two)       
        
        
#         part_one = part_one-part_one.mean()+2**(-1/2)/len1
#         part_two = part_two-part_two.mean()+2**(-1/2)/len2
        
#         even = torch.arange(0, self.kernel_size, 2).long()
#         odd = torch.arange(1, self.kernel_size, 2).long()
                
#         weight_lo = torch.Tensor(1,1,self.kernel_size)
#         weight_lo[:,:,even] = part_one
#         weight_lo[:,:,odd] = part_two      
        
#         del part_one
#         del part_two
        
#         self.weight_lo = nn.Parameter(weight_lo/torch.sqrt(self.energy(weight_lo)))
#         idx = torch.arange(self.kernel_size-1, -1, -1).long()
#         weight_hi = self.weight_lo[:,:,idx]
#         weight_hi[:,:,odd] = weight_hi[:,:,odd]*-1
#         self.weight_hi = nn.Parameter(weight_hi)

    
        nn.init.xavier_uniform_(self.weight_hi)
        weight_hi = self.weight_hi-self.weight_hi.mean()
        self.weight_hi = nn.Parameter(weight_hi/torch.sqrt(self.energy(weight_hi)))
        idx = torch.arange(self.weight_lo.size(2)-1, -1, -1).long()
        weight_lo = self.weight_hi[:,:,idx]
        odd = torch.arange(1, self.weight_lo.size(2)-1, 2).long()
        weight_lo[:,:,odd] = weight_lo[:,:,odd]*-1
        self.weight_lo = nn.Parameter(weight_lo)
        
    def energy(self, tensor):
        return (tensor.pow(2).sum())
    
    def upsample(self, weigth, filt, filt_rec):
        kernel_size = weigth.shape[-1]
        filt_size = filt.shape[-1]
        upsampeled = torch.zeros((1,1,kernel_size*2-1)).cuda() #cuda does't implements automaticly 
    
        upsampeled[:,:,::2] = weigth
        upsampeled_pad = F.pad(upsampeled,(math.ceil(filt_size/2),math.floor(filt_size/2))) #padding
        idx = torch.arange(upsampeled_pad.size(2)-1, -1, -1).long()
        return upsampeled, F.conv1d(upsampeled_pad, filt), F.conv1d(upsampeled_pad[:,:,idx], filt_rec)
    
    

    def reset_weights_enc(self):
        weight_lo, weight_hi = self.weight_hi,self.weight_lo
        self.weights[0] = weight_hi
        idx = torch.arange(weight_hi.size(2)-1, -1, -1).long()
        self.weights_dec[0] = weight_hi[:,:,idx]
        last_lo = weight_lo
        last_hi = weight_hi
        accumulated_lo = weight_lo
        accumulated_lo_dec = weight_lo[:,:,idx]
        
        for i in range(1,self.num_layers):
            last_hi, self.weights[i], self.weights_dec[i] = self.upsample(last_hi, accumulated_lo,
                                                                        accumulated_lo_dec)
            last_lo, accumulated_lo, accumulated_lo_dec = self.upsample(last_lo, accumulated_lo,
                                                                        accumulated_lo_dec)
        self.weights[self.num_layers] = accumulated_lo
        self.weights_dec[self.num_layers] = accumulated_lo_dec
        if self.disp:
            for w in self.weights[:]:
                print (self.energy(w.data))


    def reset_weights_dec(self):
        for i,weight in enumerate(self.weights):            
            inv_idx = torch.arange(weight.size(2)-1, -1, -1).long()
            self.weights_dec[i] = weight[:,:,inv_idx]


    def encoding(self,data):
        self.reset_weights_enc()
        encoding = torch.Tensor(data.size(0),self.num_layers+1,data.size(2)).cuda()
        for i,weight in enumerate(self.weights):
            filt_size = weight.size(2)-1
            left = math.ceil(filt_size/2)
            rigth = math.floor(filt_size/2)
            encoding[:,i,:] = F.conv1d(F.pad(data,(left,rigth)), weight).squeeze(1)
        return encoding
    
    def decoding(self,encoding):
#         self.reset_weights_dec()
        decoding = torch.zeros((encoding.size(0),1,encoding.size(2))).cuda()
        for i,weight in enumerate(self.weights_dec):
            filt_size = weight.size(2)-1
            left = math.floor(filt_size/2)
            rigth = math.ceil(filt_size/2)
            decoding += F.conv1d(F.pad(encoding[:,i,:].unsqueeze(1),(left,rigth)), weight)
        return decoding
        
        
        
    def forward(self,data):
        self.reset_weights_enc()
        encoding = torch.Tensor(data.size(0),self.num_layers+1,data.size(2)).cuda()
        for i,weight in enumerate(self.weights):
            filt_size = weight.size(2)-1
            left = math.ceil(filt_size/2)
            rigth = math.floor(filt_size/2)
            encoding[:,i,:] = F.conv1d(F.pad(data,(left,rigth)), weight).squeeze(1)
        return encoding
        
        


In [4]:
class classifier(nn.Module):
    def __init__(self, wavelet_num_layers, wavelet_kernel_size, stride=1,
                 padding=1, dilation=1, groups=1,disp = False):
        super(classifier, self).__init__()
        self.wavelet_num_layers = wavelet_num_layers
        self.wavelet_kernel_size = wavelet_kernel_size
        
        self.wavelet = Wavelet(wavelet_num_layers, wavelet_kernel_size, disp=disp)
        self.conv1 = nn.Conv1d(wavelet_num_layers+1, 15, 3)
        self.pool = nn.MaxPool1d(2)
        self.conv2 = nn.Conv1d(15,20,3)
        self.conv3 = nn.Conv1d(20,25,3)
        self.lastpool = nn.AdaptiveAvgPool1d(1)
        self.fc1 = nn.Linear(25,120)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(120,10)
        
        
    def forward(self,x):
        
        encoded_x = self.wavelet(x)
        output = self.pool(self.conv1(encoded_x))
        output = self.pool(self.conv2(output))
        output = self.conv3(output)
        output = self.lastpool(output)
        output = self.relu(self.fc1(output.squeeze()))
        output = self.fc2(output)
        
        return output

In [9]:
train_splits = [1,2,3,4]
test_split = 5

shared_params = {'csv_path': '/home/ilya/workspace/ESC-50/esc50.csv',
                 'wav_dir': '/home/ilya/workspace/ESC-50/audio',
                 'dest_dir': '/home/ilya/workspace/ESC-50/audio/16000',
                 'audio_rate': 16000,
                 'only_ESC10': True,
                 'pad': 0,
                 'normalize': True}

# train_gen = ESC50(folds=train_splits,
#                   randomize=True,
#                   strongAugment=True,
#                   random_crop=True,
#                   inputLength=2,
#                   mix=False,
#                   **shared_params).batch_gen(128)

test_gen = ESC50(folds=[test_split],
                 randomize=False,
                 strongAugment=False,
                 random_crop=False,
                 inputLength=4,
                 mix=False,
                 **shared_params).batch_gen(128)

# X, Y = next(train_gen)
# X.shape, Y.shape

In [None]:
net = classifier(5,10)
net.cuda()

classifier(
  (wavelet): Wavelet()
  (conv1): Conv1d(6, 15, kernel_size=(3,), stride=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(15, 20, kernel_size=(3,), stride=(1,))
  (conv3): Conv1d(20, 25, kernel_size=(3,), stride=(1,))
  (lastpool): AdaptiveAvgPool1d(output_size=1)
  (fc1): Linear(in_features=25, out_features=120, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=120, out_features=10, bias=True)
)

In [None]:
writer = SummaryWriter('runs/train')
def criterion(outputs, labels, w_hi, w_lo, C):
    criterion = nn.CrossEntropyLoss()
    kernel_size = w_hi.size(2)
    
    L1 = (w_hi.pow(2).sum()-1).pow(2)
    L2 = (w_lo.pow(2).sum()-1).pow(2)
    en = L1+L2
    
    tmp = torch.zeros((1,1,1)).cuda()
    for m in range(1,w_hi.size(2)//2):
        prods = [w_hi[:,:,i]*w_hi[:,:,i+2*m] for i in range(kernel_size-2*m)]
        for n in prods: tmp += n 
    L1 = tmp[0,0,0].pow(2) + L1
    
    tmp = torch.zeros((1,1,1)).cuda()
    for m in range(1,w_hi.size(2)//2):
        prods = [w_lo[:,:,i]*w_lo[:,:,i+2*m] for i in range(kernel_size-2*m)]
        for n in prods: tmp += n 
    L2 = tmp[0,0,0].pow(2) + L2      

    L3 = torch.zeros((1,1,1)).cuda()
    prods = [w_hi[:,:,i]*-1 for i in range(1,kernel_size,2)]
    for n in prods: L3 += n 
    L3 = L3[0,0,0].pow(2)
    
    L4 = (w_lo.sum()-2**(1/2)).pow(2)
    
#     L5 = torch.zeros((1,1,1)).cuda()
#     for m in range(1,w_hi.size(2)//2):
#         prods = [w_lo[:,:,i]*w_hi[:,:,i+2*m] for i in range(kernel_size-2*m)]
#         for n in prods: L5 += n 
#     L5 = L5[0,0,0].pow(2) 

    CE_Loss = criterion(outputs, labels)
    return (CE_Loss+C*(L1+L2+L3+L4), en, CE_Loss)

In [None]:
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.95)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.7,patience=200, verbose=True, threshold=0.001)

In [14]:
C = 0.05
for epoch in range(100):  # loop over the dataset multiple times

    train_gen = ESC50(folds=train_splits,
                  randomize=True,
                  strongAugment=False,
                  random_crop=False,
                  inputLength=2,
                  mix=False,
                  **shared_params).batch_gen(50)
    
    test_gen = ESC50(folds=[test_split],
                 randomize=True,
                 strongAugment=False,
                 random_crop=False,
                 inputLength=4,
                 mix=False,
                 **shared_params).batch_gen(50)
    
    
    for i, (inputs, labels) in enumerate(train_gen):
        # get the inputs
        
        inputs, labels = torch.Tensor(inputs).transpose(1,2).cuda(), torch.LongTensor(labels).cuda()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        
               
        
        _, labels = torch.max(labels, 1)
        

        acc = (outputs.max(1)[1]==labels).float().mean() 
        
        loss, en, ce_loss = criterion(outputs, labels, net.wavelet.weight_hi, net.wavelet.weight_lo, C)
        
        writer.add_scalar("loss", loss.item())
        writer.add_scalar("en", en.item())
        writer.add_scalar("Acc", acc.item())
        
        try:
            if en > 15: raise EnergyError(en)
        except EnergyError as e:
            print ('Energy of filteres occurred to be:', e.value, ce_loss, ' step is ', i)
            net = classifier(5,10)
            net.load_state_dict(torch.load('saves/best_model'))
            net.cuda()
            continue
            
        scheduler.step(loss)
        loss.backward()
        optimizer.step()
        

        if i%100 == 0:
            
            print ('epoch = {}, iter = {}, energy = {:1.4}, CE_Loss = {:1.4}, Loss = {:1.4}'.format
                   (epoch, i, en.item(), ce_loss.item(), loss.item()))
                
            idx = torch.arange(net.wavelet.weight_hi.size(2)-1, -1, -1).long()
            hi_f = np.abs(fft(net.wavelet.weight_hi[0,0,idx].cpu().data.numpy()))
            lo_f = np.abs(fft(net.wavelet.weight_lo[0,0,idx].cpu().data.numpy()))
            n = hi_f.shape[-1]
            plt.plot(range(n//2),hi_f[:n//2])
            plt.plot(range(n//2),lo_f[:n//2]) 
            plt.show()
                
        if i%200 == 0:
            inputs, labels = test_gen.__next__()
            inputs, labels = torch.Tensor(inputs).transpose(1,2).cuda(), torch.LongTensor(labels).cuda()

            outputs = net(inputs)

#             _, predicted = torch.max(outputs, 1)
#             _, labels = torch.max(labels, 1)
            try:
                roc_auc = roc_auc_score(labels.detach().cpu().numpy(),outputs.detach().cpu().numpy())
            except ValueError:
                continue
            
            if best_score < roc_auc:
                best_score = roc_auc
                torch.save(net.state_dict(),'saves/best_model')
            
            writer.add_scalar("ROC AUC Val", roc_auc.item())    


In [6]:
net = classifier(5,10)
net.load_state_dict(torch.load('saves/last_model5_10'))
net.cuda()
net.wavelet.reset_weights_enc()

In [13]:
hi,lo = net.wavelet.weight_hi, net.wavelet.weight_lo 

idx = torch.arange(hi.size(2)-1, -1, -1).long() 
hi_f,lo_f = np.abs(fft(hi[0,0,idx].cpu().data.numpy())), np.abs(fft(lo[0,0,idx].cpu().data.numpy())) 
n = hi_f.shape[-1] 
plt.grid(True) 
plt.axis([0, 1, 0, 2.5]) 
plt.plot(np.arange(n//2+1)/(n//2),lo_f[:n//2+1]) 
plt.plot(np.arange(n//2+1)/(n//2),hi_f[:n//2+1]) 
plt.xlabel(r'$\frac{2f}{f_д}$', fontsize=20) 
plt.ylabel('Амплитуда') 
plt.savefig('learned.png', dpi=100) 
plt.show() 
for i,w in enumerate(net.wavelet.weights): 
    plt.grid(True) 
    idx = torch.arange(w.size(2)-1, -1, -1).long() 
    f = np.abs(fft(w[0,0,idx].cpu().data.numpy())) 
    n = w.shape[-1] 
    plt.axis([0, 1, 0, 30]) 
    plt.xlabel(r'$\frac{2f}{f_д}$', fontsize=20) 
    plt.ylabel('Амплитуда') 
    plt.plot(np.array(range(n//2+1))/(n//2),f[:n//2+1]) 
    plt.savefig('layerslearned{}.png'.format(i)) 
    plt.show()

In [12]:
from IPython.display import clear_output

total = 0
correct = 0
test_gen = ESC50(folds=[test_split],
                 randomize=False,
                 strongAugment=False,
                 random_crop=False,
                 inputLength=4,
                 mix=False,
                 **shared_params).batch_gen(50)


for inputs, labels in test_gen:
        # get the inputs
        inputs, labels = torch.Tensor(inputs).transpose(1,2).cuda(), torch.LongTensor(labels).cuda()

        # forward + backward + optimize
        outputs = net(inputs)

        _, predicted = torch.max(outputs, 1)
        _, labels = torch.max(labels, 1)
        
#         loss = criterion(outputs, labels, net.wavelet.weight_hi, net.wavelet.weight_lo, C, l2)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        clear_output(wait=1)
        print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


In [None]:
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))