In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.models as vmodels
import torchaudio.transforms as transforms
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import glob
import soundfile as sf
from scipy.interpolate import interp1d
import IPython.display as ipd
import sys

In [2]:
# Hyper parameters
EPOCHS = 10
TRAIN_BATCH_SIZE = 4
N_MELS = 256
MAXLEN = 800

In [4]:
def reshape_mel_spec(mel_spec):
        diff = MAXLEN - mel_spec.shape[2]
        if diff > 0:
            # pad
            s = int(diff/2) 
            e = diff - s
            mel_spec = F.pad(input=mel_spec, pad=(s, e, 0, 0, 0, 0), mode='constant', value=0)
        else:
            # trim
            mel_spec = mel_spec.narrow(2, diff*-1, MAXLEN)

        mel_spec = mel_spec.repeat(3, 1, 1)
        return mel_spec

In [5]:
class RainforestNet(nn.Module):
    def __init__(self):
        super(RainforestNet, self).__init__()
        self.resnet = vmodels.resnet50(pretrained=True)
        self.fc1 = nn.Linear(1000, 500)
        self.fc2 = nn.Linear(500, 225)
        self.fc3 = nn.Linear(225, 24)

    def forward(self, x):
        x = self.resnet(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        #x = self.fc2(x)
        x = self.fc3(x)

        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [8]:
# CUDA
is_cuda = True and torch.cuda.is_available()
device = torch.device('cuda') if is_cuda else torch.device('cpu')

print ('cuda: ', is_cuda)

torch.cuda.empty_cache()

cuda:  True


In [6]:
test_files = glob.glob( 'data/test/*.flac' )

In [9]:
test_res = model = RainforestNet()
model.load_state_dict(torch.load('models/rainforest1608410308.1312122.pt'))
test_res = test_res.to(device)

In [10]:
#test_files.head()

In [11]:
torch.set_printoptions(sci_mode=False)
np.set_printoptions(suppress=True)

In [14]:
results = pd.DataFrame()
print (device)

for f in tqdm(range(len(test_files))):
    #ex = test_files.iloc[idx]
    path = test_files[f]
    seconds = 3
    waveform, sample_rate = torchaudio.load(path)
    
    clips = waveform.split(sample_rate*seconds, dim=1)
    tot = torch.zeros(24).to(device)
    with torch.no_grad():
        for clip in clips:
            mel_spec = transforms.MelSpectrogram(sample_rate=sample_rate, n_mels=N_MELS)(clip)
            mel_spec = reshape_mel_spec(mel_spec)
            mel_spec = mel_spec.unsqueeze(0)
            mel_spec = mel_spec.to(device)
            
            output = test_res(mel_spec)
            #print (F.relu(output))
            
            tot = tot+output[0]
    
    tot = F.sigmoid(tot)
    
    tot = tot.to(torch.device('cpu')).numpy()    
    results = results.append({ 'result': tot }, ignore_index=True)

cuda


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1992.0), HTML(value='')))




In [15]:
np.set_printoptions(suppress=True, threshold=sys.maxsize)
results.head()

Unnamed: 0,result
0,"[1.01427814e-19, 3.2249494e-38, 8.613344e-15, ..."
1,"[9.080033e-22, 2.0659597e-35, 9.0906026e-11, 4..."
2,"[6.5323896e-17, 2.259268e-37, 6.102157e-17, 1...."
3,"[8.0229635e-11, 1.1282021e-36, 1.8402546e-10, ..."
4,"[5.678724e-19, 9.963914e-36, 1.1447108e-09, 2...."


In [16]:
submission = pd.DataFrame()

for idx in range(len(results)):
    ex = results.iloc[idx]
    recording_id = test_files[idx].replace('.flac', '').replace('data/test\\', '')
    result = ex.result
    
    species = 0
    o = {}
    for r in result:
        s = 's{}'.format(species)
        o[s] = r
        species += 1 
    o["recording_id"] = recording_id
    submission = submission.append(o, ignore_index=True)

In [17]:
cols = ['recording_id', 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21', 's22', 's23']
submission = submission[cols]

In [18]:
submission.head()

Unnamed: 0,recording_id,s0,s1,s2,s3,s4,s5,s6,s7,s8,...,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23
0,000316da7,1.0142779999999999e-19,3.224949e-38,8.613344e-15,3.781055e-35,1.24676e-08,0.999988,0.801565,0.124384,0.1310925,...,8.686459000000001e-39,0.117909,2.254558e-11,1.0,0.0,9.783792000000001e-23,0.263219,8.683350999999999e-30,1.845194e-12,0.999999
1,003bc2cb2,9.080033e-22,2.06596e-35,9.090603e-11,4.6379900000000004e-33,6.013069e-06,0.999567,0.786004,0.414118,0.1835082,...,4.223685e-38,0.987164,9.730175e-06,1.0,0.0,2.652655e-19,0.004357989,4.687677e-30,1.808737e-14,0.994527
2,0061c037e,6.532390000000001e-17,2.259268e-37,6.102157e-17,1.5402080000000001e-33,4.574869e-12,1.0,0.999994,0.172047,1.907333e-08,...,0.0,0.000301,4.262995e-08,1.0,0.0,1.098226e-16,0.2791064,2.120831e-30,2.069824e-07,0.99997
3,010eb14d3,8.022964e-11,1.128202e-36,1.840255e-10,8.747042000000001e-39,0.9999738,1.0,0.032535,0.015878,0.9994629,...,1.881186e-34,0.000134,1.320253e-10,0.005571,0.0,2.997318e-21,3.825161e-11,9.440732e-22,3.857265e-07,0.999775
4,011318064,5.678724e-19,9.963913999999999e-36,1.144711e-09,2.521035e-33,0.0002161447,0.999624,0.992177,0.130415,0.0088887,...,9.074977e-37,0.480112,3.838839e-07,1.0,0.0,7.164003e-18,0.001084353,6.133769e-28,6.729225e-11,0.995812


In [19]:
np.set_printoptions(suppress=True, threshold=sys.maxsize)
submission.to_csv("submissions/submission-resnet-3.csv", index=False, float_format='%.5f')