In [37]:
import glob
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import gc
import time
from sklearn.model_selection import train_test_split
from datetime import timedelta
import matplotlib.pyplot as plt
import nnutils_by
import importlib
importlib.reload(nnutils_by)
import sys

In [38]:
import torch
import torchaudio
import torch.nn as nn
import torchaudio.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F

for each audio file, store the mel spectrogram and the species that are in it. 

loop through each audio file and get the mel specs. add it to the list
then for each species entry for the given audio file, set the vector of length 24 to have the correct species


In [39]:
SAMPLE_RATE= 48000
BATCH_SIZE = 16
VALIDATION_SPLIT = .2
SHUFFLE = True
RANDOM_SEED = 42
LR = 0.001
mel_params = {
    "sample_rate": SAMPLE_RATE,
    "n_fft": 4096,
    "hop_length": 2048,
    "n_mels": 64,
    "f_min": 0,
    "f_max": SAMPLE_RATE / 2,
    "power": 2
}

In [40]:
# CUDA
is_cuda = True and torch.cuda.is_available()
cpu = torch.device('cpu')
gpu = torch.device('cuda')
device = gpu if is_cuda else cpu

print ('cuda: ', is_cuda)

torch.cuda.empty_cache()

cuda:  True


In [25]:
dataset = nnutils_by.MelDataset(mel_params)
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(VALIDATION_SPLIT * dataset_size))
if SHUFFLE:
    np.random.seed(RANDOM_SEED)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE,
                                                sampler=valid_sampler)

using device cuda


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4727.0), HTML(value='')))




In [26]:
len(train_loader), len(validation_loader)

(237, 60)

In [27]:
rnn = nnutils_by.RNN(1407).to(device)
rnn.train()
print(rnn)

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all cnn parameters
loss_func = nn.BCELoss()                     # CrossEntropyLoss

RNN(
  (rnn1): LSTM(1407, 64, num_layers=2, batch_first=True)
  (l1): Linear(in_features=64, out_features=64, bias=True)
  (d1): Dropout(p=0.2, inplace=False)
  (out): Linear(in_features=64, out_features=24, bias=True)
  (relu): ReLU()
)


In [None]:
num_epochs = 10
start_time = time.time()
losses = []
losses_detail = []

for epoch in tqdm(range(num_epochs)):
    # Train:   
    running_loss = 0.0
#     t = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}, running loss: {running_loss}")

    for batch_index, (inputs, labels, _) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
#         print(inputs.shape)
        inputs = inputs.reshape(-1, 64, 1407)
#         print(inputs.shape)

        optimizer.zero_grad()

        output = rnn(inputs)
        loss = loss_func(output, labels)

        loss.backward()
        optimizer.step()

        del inputs
        del labels

        torch.cuda.empty_cache()

        running_loss += loss.item()
        losses_detail.append(loss.item())
        loss_interval = 50
        if batch_index % loss_interval == 0:    # print every 50 mini-batches
            #print('[%d %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 50))
            interval_loss = running_loss/loss_interval
            losses.append(interval_loss)
#             t.set_description(f"")
#             print(f"Epoch {batch_index}/{num_epochs}, running_loss: {interval_loss:{2}.{5}}")
#             t.refresh()
            running_loss = 0.0
    print(f"Epoch {epoch}/{num_epochs}, running_loss: {running_loss:{2}.{5}}")
        
end_time = time.time()
print ('\n-------- DONE --------')
print ('start time: {}'.format(start_time))
print ('end time: {}\n'.format(end_time))

duration = end_time-start_time
print ('training duration: {}'.format(str(timedelta(seconds=duration))))

model_path = 'models/rainforest{}.pt'.format(end_time)
torch.save(rnn.state_dict(), model_path)


In [None]:
importlib.reload(nnutils_by)

In [65]:
model_path = "models/rainforest1609024664.0753264.pt"

valid_model = nnutils_by.RNN(1407)
valid_model.load_state_dict(torch.load(model_path))
valid_model.eval()
valid_model = valid_model.to(device)

valid_results = pd.DataFrame()

with torch.no_grad():
    for i, data in enumerate(tqdm(validation_loader)):
        inputs, labels, audio_ids = data
        inputs, labels, audio_ids = inputs.to(device), labels.to(device), audio_ids
        for ind in range(inputs.shape[0]):
            outputs = valid_model(inputs[ind])
            res = outputs[0].to(cpu).numpy()

            valid_results = valid_results.append({ 'result': res , 'recording_id' : audio_ids[ind]}, ignore_index=True)
        
    submission = pd.DataFrame()
    

    for idx in tqdm(range(len(valid_results))):
        
        ex = valid_results.iloc[idx]
        recording_id = ex.recording_id
        result = ex.result

        species = 0
        o = {}
        for r in result:
            s = 's{}'.format(species)
            o[s] = r
            species += 1 
        o["recording_id"] = recording_id
        submission = submission.append(o, ignore_index=True)
    
    del valid_results
    cols = ['recording_id', 's0', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21', 's22', 's23']
    submission = submission[cols]


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=945.0), HTML(value='')))




In [70]:
submission.shape

(945, 25)

In [71]:
from utils import score
s = score(submission)
print(s)

(nan, 0.23523449160696627)
