Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.

In [None]:
# pip install matplotlib
# !pip install opencv-python==4.5.5.64
# !pip install torch==1.10.2+cu113 torchvision==0.11.3+cu113 torchaudio==0.10.2+cu113 --extra-index-url https://download.pytorch.org/whl/cu113

In [1]:
import torch
import torch.optim as optim
import torch.utils.data as data_utils
import os
import numpy as np
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold
import csv
import pandas as pd
import matplotlib.pyplot as plt
import glob
import gc
import h5py
import pickle as pk

from utils import log_results, SaveBestModel, train_seq, test_seq
from utils import normalize_mel_sp_slides

from models import cnn_rnn

In [2]:
torch.cuda.is_available()

True

In [5]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [6]:
device

device(type='cuda', index=1)

# Set directories

In [7]:
dataDir = './data'
resultsDir = 'Results'
tempDir = 'temp'

if not os.path.exists(resultsDir):
    os.makedirs(resultsDir)
if not os.path.exists(tempDir):
    os.makedirs(tempDir)

# Load data

In [8]:
# !conda install numpy pytables

In [9]:
# fname = 'birds_xeno_spectr_slide_105_species_sr_32000_len_7_sec_500_250_New.h5'
# fileLoc = os.path.join(dataDir,fname)
# hf = h5py.File(fileLoc, 'r')
# mel_sp = hf.get('mel_spectr')[()]
# metadata_total = pd.read_hdf(fileLoc, 'info')
# hf.close()

# ##Saving files

# np.save('./data/mel_sp.npy', mel_sp)
# metadata_total.to_pickle('./data/metadata_total.pkl', protocol=4)

In [10]:
import numpy as np
import pandas as pd

mel_sp = np.load('./data/mel_sp.npy')
metadata_total = pd.read_pickle('./data/metadata_total.pkl')


In [11]:
original_label = list(metadata_total['species'])
lb_bin = LabelBinarizer()
lb_enc = LabelEncoder()
labels_one_hot = lb_bin.fit_transform(original_label)
labels_multi_lbl = lb_enc.fit_transform(original_label)

number_of_sample_classes = len(lb_enc.classes_)
print("Number of Species: ", number_of_sample_classes)
species_id_class_dict_tp = dict()
for (class_label, species_id) in enumerate(lb_bin.classes_):
    species_id_class_dict_tp[species_id] = class_label

Number of Species:  105


In [12]:
mel_sp_normalized = []
for i in range(len(mel_sp)):
    xx_ = normalize_mel_sp_slides(mel_sp[i]).astype('float32')
    mel_sp_normalized += [np.expand_dims(xx_, axis=-3)]
mel_sp_normalized = np.array(mel_sp_normalized)

In [13]:
batch_size = 16*2
shuffleBatches=True
num_epoch = 50

## CNN configs

In [14]:
cfg_cnn = [32, 'M', 64, 64, 'M', 128, 128, 128, 'M', 128, 128, 128, 'M'] # CNN1
# n_units = 128*2

cfg_cnn2 = [32, 64, 'M', 64, 64, 64, 'M', 128, 128, 128, 'M', 128, 128, 128, 'M', 256, 256, 256, 'M']
# n_units = 256*2

cfg_cnn3 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] # CNN3
n_units = 512*2

## RNN configs

For RNN, a list of configs could be provided for testing multiple configurations.

Each configuration element is dictionary with key as 'ordered' name of required RNNS. For example, to have 2 layers of GRUs, use 'GRU_0', 'GRU_1', similarly, for 1 GRU followed by 1 LMU, use 'GRU_0', 'LMU_1', contrary, to use LMU and then GRU, use 'LMU_0', 'GRU_1'. Currently supported RNN cells are LSTM, GRU, and LMU.

Each key has value as another dictionary with entries:
input_size-> input dimension of this RNN cell
h_states_ctr-> number of inner states in the RNN cell. For LSTM it is 2, GRU has 1, LMU has 2.

In [15]:
hidden_size = 512
rnnConfigs = [
    {'LSTM_0':{'input_size':n_units, 'h_states_ctr':2},
    'LSTM_1':{'input_size':hidden_size, 'h_states_ctr':2}  # 2 layers of LSTM cell
    },
    {'LMU_0':{'input_size':n_units, 'h_states_ctr':2},
    'LMU_1':{'input_size':hidden_size, 'h_states_ctr':2}, # 2 layers of LMU cell
    },
    {'GRU_0':{'input_size':n_units, 'h_states_ctr':1},
    'GRU_1':{'input_size':hidden_size, 'h_states_ctr':1}, # 2 layers of GRU cell
    },
    {'GRU_0':{'input_size':n_units, 'h_states_ctr':1},
    'LMU_1':{'input_size':hidden_size, 'h_states_ctr':2}, # 1 GRU cell and then 1 LMU cell
    },
]

make sure to assign different exp_no for each experiments

In [16]:
# device = torch.device('cpu')

In [None]:
exp_no_base = 0
exp_ctr = 0
for ii, cfg in enumerate(rnnConfigs):
    exp_ctr += 1

    exp_no = exp_no_base + exp_ctr
    log_file_name = f'100_species_spectr_cnn_rnn_7sec_h_{hidden_size}_nl_{ii+1}_{exp_no}.p'
    store_ = log_results(file_name=log_file_name, results_dir = resultsDir)
    PATH_curr = os.path.join(tempDir, f'currentModel_cnn_rnn_{exp_no}.pt')
    saveModel = SaveBestModel(PATH=PATH_curr, monitor=-np.inf, verbose=True)

    exp_ind = 0
    skf = StratifiedKFold(n_splits=5, random_state=None)
    for train_ind, test_ind in skf.split(mel_sp_normalized, labels_multi_lbl):

        PATH_curr = os.path.join(tempDir, f'currentModel_cnn_rnn_{exp_no}_{exp_ind}.pt')
        saveModel = SaveBestModel(PATH=PATH_curr, monitor=-np.inf, verbose=True)

        X_train, X_test_p_valid = mel_sp_normalized[train_ind,:], mel_sp_normalized[test_ind,:]

        y_train, y_test_p_valid = labels_one_hot[train_ind], labels_one_hot[test_ind]
        y_train_mlbl, y_test_p_valid_mlbl = labels_multi_lbl[train_ind], labels_multi_lbl[test_ind]
        X_valid, X_test, y_valid, y_test = train_test_split(X_test_p_valid, y_test_p_valid,
                                                               test_size=0.5,
                                                               stratify=y_test_p_valid_mlbl,
                                                               random_state=42)

        print('X_train shape: ', X_train.shape)
        print('X_valid shape: ', X_valid.shape)
        print('X_test shape: ', X_test.shape)

        X_train, X_valid  = torch.from_numpy(X_train).float(), torch.from_numpy(X_valid).float()
        y_train, y_valid = torch.from_numpy(y_train), torch.from_numpy(y_valid)

        y_train, y_valid = y_train.float(), y_valid.float()
        train_use = data_utils.TensorDataset(X_train, y_train)
        train_loader = data_utils.DataLoader(train_use, batch_size=batch_size, shuffle=shuffleBatches)

        val_use = data_utils.TensorDataset(X_valid, y_valid)
        val_loader = data_utils.DataLoader(val_use, batch_size=32, shuffle=False)

        model = cnn_rnn(cnnConfig = cfg_cnn3, 
                        rnnConfig = cfg, 
                        hidden_size=hidden_size, 
                        # order=order,
                        # theta=theta,
                        num_classes=105)
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001, weight_decay=1e-7)

        val_acc_epochs = []
        val_loss_epochs = []
        for epoch in range(1, num_epoch+1):
            train_loss = train_seq(model, train_loader, optimizer, epoch, 
                                    device,
                                    verbose=1, loss_fn = 'bceLogit')
            val_loss, val_acc = test_seq(model, val_loader,
                                        device,
                                        loss_fn = 'bceLogit')
            val_acc_epochs.append(val_acc)
            val_loss_epochs.append(val_loss)
            print('val loss = %f, val acc = %f'%(val_loss, val_acc))
            saveModel.check(model, val_acc, comp='max')

        # loading best validated model
        model = cnn_rnn(cnnConfig = cfg_cnn3, 
                        rnnConfig = cfg, 
                        hidden_size=hidden_size, 
                        # order=order,
                        # theta=theta,
                        num_classes=105)
        model.to(device)
        model.load_state_dict(torch.load(PATH_curr))

        X_test, y_test  = torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()

        test_use = data_utils.TensorDataset(X_test, y_test)
        test_loader = data_utils.DataLoader(test_use, batch_size=32, shuffle=False)
        test_loss, test_acc = test_seq(model, test_loader,
                                    device,
                                    loss_fn = 'bceLogit')
        print('test loss = %f, test acc = %f'%(test_loss, test_acc))

        log_ = dict(
                exp_ind = exp_ind,
                epochs = num_epoch,
                validation_accuracy = val_acc_epochs,
                validation_loss = val_loss_epochs,
                test_loss = test_loss,
                test_accuracy = test_acc,
                X_train_shape = X_train.shape,
                X_valid_shape = X_valid.shape,
                batch_size =batch_size,
        )
        store_.update(log_)
        exp_ind += 1    

X_train shape:  (42000, 26, 1, 128, 32)
X_valid shape:  (5250, 26, 1, 128, 32)
X_test shape:  (5250, 26, 1, 128, 32)


val loss = 0.038860, val acc = 0.244762
saving best model...


val loss = 0.024702, val acc = 0.542857
saving best model...


val loss = 0.022304, val acc = 0.601524
saving best model...


val loss = 0.015481, val acc = 0.728381
saving best model...


val loss = 0.014521, val acc = 0.747048
saving best model...


val loss = 0.014108, val acc = 0.754857
saving best model...


val loss = 0.012254, val acc = 0.786286
saving best model...


val loss = 0.011131, val acc = 0.815810
saving best model...


val loss = 0.012750, val acc = 0.789524


val loss = 0.010584, val acc = 0.821524
saving best model...


val loss = 0.010612, val acc = 0.834857
saving best model...


val loss = 0.014679, val acc = 0.768000


val loss = 0.011761, val acc = 0.821143


val loss = 0.009722, val acc = 0.853143
saving best model...


val loss = 0.012079, val acc = 0.822667


val loss = 0.012077, val acc = 0.842857


val loss = 0.010506, val acc = 0.849905


val loss = 0.012220, val acc = 0.823810


val loss = 0.009672, val acc = 0.864000
saving best model...


val loss = 0.011039, val acc = 0.850857


val loss = 0.010547, val acc = 0.861524


val loss = 0.012299, val acc = 0.835619


val loss = 0.009719, val acc = 0.871810
saving best model...


val loss = 0.010453, val acc = 0.869905


val loss = 0.010665, val acc = 0.868571


val loss = 0.010755, val acc = 0.871429


val loss = 0.010496, val acc = 0.862095


val loss = 0.010263, val acc = 0.864762


val loss = 0.011242, val acc = 0.866095


val loss = 0.011428, val acc = 0.873905
saving best model...


val loss = 0.009773, val acc = 0.876571
saving best model...


val loss = 0.011596, val acc = 0.852000


val loss = 0.012463, val acc = 0.870286


val loss = 0.010631, val acc = 0.869524


val loss = 0.012047, val acc = 0.872381


val loss = 0.011697, val acc = 0.864571


val loss = 0.010548, val acc = 0.881714
saving best model...


val loss = 0.011496, val acc = 0.868571


val loss = 0.010650, val acc = 0.876571


val loss = 0.011759, val acc = 0.865333


val loss = 0.010640, val acc = 0.879810


val loss = 0.010600, val acc = 0.877905


val loss = 0.011122, val acc = 0.872190


val loss = 0.010579, val acc = 0.886095
saving best model...


val loss = 0.011996, val acc = 0.873143


val loss = 0.010349, val acc = 0.892952
saving best model...


val loss = 0.011645, val acc = 0.877714


val loss = 0.010728, val acc = 0.888381


val loss = 0.010696, val acc = 0.884571


val loss = 0.012079, val acc = 0.872571
test loss = 0.010501, test acc = 0.889905
X_train shape:  (42000, 26, 1, 128, 32)
X_valid shape:  (5250, 26, 1, 128, 32)
X_test shape:  (5250, 26, 1, 128, 32)


val loss = 0.035972, val acc = 0.317714
saving best model...


val loss = 0.028280, val acc = 0.474095
saving best model...


val loss = 0.018952, val acc = 0.664381
saving best model...


val loss = 0.015322, val acc = 0.732952
saving best model...


val loss = 0.016094, val acc = 0.718857


val loss = 0.012095, val acc = 0.783429
saving best model...


val loss = 0.012482, val acc = 0.781143


val loss = 0.013142, val acc = 0.775429


val loss = 0.015584, val acc = 0.752571


val loss = 0.009613, val acc = 0.841333
saving best model...


val loss = 0.012193, val acc = 0.812381


val loss = 0.010992, val acc = 0.827238


val loss = 0.009722, val acc = 0.848190
saving best model...


val loss = 0.012454, val acc = 0.817905


val loss = 0.010429, val acc = 0.837333


val loss = 0.011370, val acc = 0.832000


val loss = 0.010029, val acc = 0.855048
saving best model...


val loss = 0.009721, val acc = 0.857714
saving best model...


val loss = 0.013439, val acc = 0.830286


val loss = 0.009535, val acc = 0.859810
saving best model...


val loss = 0.011122, val acc = 0.858667


val loss = 0.010997, val acc = 0.858476


val loss = 0.010665, val acc = 0.862095
saving best model...


val loss = 0.009888, val acc = 0.874095
saving best model...


val loss = 0.012198, val acc = 0.842476


val loss = 0.010075, val acc = 0.871429


val loss = 0.013796, val acc = 0.816571


val loss = 0.014064, val acc = 0.840571


val loss = 0.011900, val acc = 0.855619


val loss = 0.009766, val acc = 0.885524
saving best model...


val loss = 0.009916, val acc = 0.872952


val loss = 0.011713, val acc = 0.850286


val loss = 0.011486, val acc = 0.871810


val loss = 0.011549, val acc = 0.877905


val loss = 0.010853, val acc = 0.864000


val loss = 0.010972, val acc = 0.870095


val loss = 0.010546, val acc = 0.877143


val loss = 0.009906, val acc = 0.868571


val loss = 0.009601, val acc = 0.882286


val loss = 0.009679, val acc = 0.887810
saving best model...


val loss = 0.010279, val acc = 0.876000


val loss = 0.009180, val acc = 0.881714


val loss = 0.010423, val acc = 0.880571


val loss = 0.011756, val acc = 0.883810


val loss = 0.012211, val acc = 0.847048


val loss = 0.010886, val acc = 0.870476


val loss = 0.010720, val acc = 0.875429


val loss = 0.010952, val acc = 0.886286


val loss = 0.010068, val acc = 0.878286


val loss = 0.011871, val acc = 0.872190
test loss = 0.009896, test acc = 0.888000
X_train shape:  (42000, 26, 1, 128, 32)
X_valid shape:  (5250, 26, 1, 128, 32)
X_test shape:  (5250, 26, 1, 128, 32)


val loss = 0.034818, val acc = 0.319619
saving best model...


val loss = 0.025530, val acc = 0.522476
saving best model...


val loss = 0.019456, val acc = 0.643429
saving best model...


val loss = 0.015217, val acc = 0.729143
saving best model...


val loss = 0.014579, val acc = 0.744190
saving best model...


val loss = 0.012153, val acc = 0.781905
saving best model...


val loss = 0.012441, val acc = 0.784000
saving best model...


val loss = 0.010070, val acc = 0.824190
saving best model...


