In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch
import torchaudio
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as vmodels
import torchaudio.transforms as transforms
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import glob
import soundfile as sf
from scipy.interpolate import interp1d
import IPython.display as ipd
import sys
import time
from nnutils import reshape_mel_spec, RainforestNetV2

In [2]:
# Hyper parameters
EPOCHS = 10
TRAIN_BATCH_SIZE = 4
N_MELS = 224
MAXLEN = 800

In [3]:
# CUDA
is_cuda = True and torch.cuda.is_available()
device = torch.device('cuda') if is_cuda else torch.device('cpu')

print ('cuda: ', is_cuda)

torch.cuda.empty_cache()

cuda:  True


In [4]:
test_files = glob.glob( 'data/test/*.flac' )

In [5]:
test_res = model = RainforestNetV2()
model.load_state_dict(torch.load('models/rainforest1608769852.3191528.pt'))
test_res.eval()
test_res = test_res.to(device)

In [6]:
#test_files.head()

In [7]:
torch.set_printoptions(sci_mode=False)
np.set_printoptions(suppress=True)

In [8]:
results = pd.DataFrame()
print (device)

start_time = time.time()

mel_trans = transforms.MelSpectrogram(sample_rate=48000, n_mels=224, n_fft=5000, f_min=90.0, f_max=14000.0).to(device)

for f in tqdm(range(len(test_files))):
    #ex = test_files.iloc[idx]
    path = test_files[f]
    waveform, sample_rate = torchaudio.load(path)
    waveform = waveform.to(device)
    
    mel_spec = mel_trans(waveform)
    
    res = torch.zeros(24).to(device)
    with torch.no_grad():
        mel_spec = mel_spec.repeat(3, 1, 1)
        mel_spec = mel_spec.unsqueeze(0)
        mel_spec = mel_spec.to(device)

        output = test_res(mel_spec)[0]
        res = res+output
    
    res = res.to(torch.device('cpu')).numpy()    
    results = results.append({ 'result': res }, ignore_index=True)

end_time = time.time()
print ('\n-------- DONE --------')
print ('start time: {}'.format(start_time))
print ('end time: {}\n'.format(end_time))

duration = end_time-start_time
print ('testing duration: {}'.format(duration))

cuda


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1992.0), HTML(value='')))



-------- DONE --------
start time: 1608770146.9352384
end time: 1608770290.2482183

testing duration: 143.31297993659973


In [9]:
np.set_printoptions(suppress=True, threshold=sys.maxsize)
results.head()

Unnamed: 0,result
0,"[0.090642, 0.072187826, 0.062998205, 0.2130002..."
1,"[0.106719024, 0.21598183, 0.076697946, 0.13864..."
2,"[0.06689198, 0.053306475, 0.061983682, 0.18149..."
3,"[0.9976802, 0.007722797, 0.030833354, 0.010122..."
4,"[0.07713989, 0.11319038, 0.076140225, 0.128138..."


In [14]:
submission = pd.DataFrame()

for idx in range(len(results)):
    ex = results.iloc[idx]
    recording_id = test_files[idx].replace('.flac', '').replace('data/test\\', '')
    result = ex.result
    
    species = 0
    o = {}
    for r in result:
        s = 's{}'.format(species)
        o[s] = r.round()
        species += 1 
    o["recording_id"] = recording_id
    submission = submission.append(o, ignore_index=True)

In [15]:
cols = ['recording_id', 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21', 's22', 's23']
submission = submission[cols]

In [16]:
submission.head()

Unnamed: 0,recording_id,s0,s1,s2,s3,s4,s5,s6,s7,s8,...,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23
0,000316da7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,003bc2cb2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0061c037e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,010eb14d3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,011318064,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
np.set_printoptions(suppress=True, threshold=sys.maxsize)
submission.to_csv("submissions/submission-{}.csv".format(end_time), index=False, float_format='%.5f')