Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.

In [None]:
# pip install matplotlib
# !pip install opencv-python==4.5.5.64
# pip install torch==1.8.1+cu102 torchvision==0.9.1+cu102 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html

In [1]:
import torch
import torch.optim as optim
import torch.utils.data as data_utils
import os
import numpy as np
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold
import csv
import pandas as pd
import matplotlib.pyplot as plt
import glob
import gc
import h5py
import pickle as pk

from utils import log_results, SaveBestModel, train_seq, test_seq
from utils import normalize_mel_sp_slides

from models import cnn_rnn

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
device

device(type='cuda', index=0)

# Set directories

In [5]:
dataDir = './data'
resultsDir = 'Results'
tempDir = 'temp'

if not os.path.exists(resultsDir):
    os.makedirs(resultsDir)
if not os.path.exists(tempDir):
    os.makedirs(tempDir)

# Load data

In [5]:
# !conda install numpy pytables

In [None]:
# fname = 'birds_xeno_spectr_slide_105_species_sr_32000_len_7_sec_500_250_New.h5'
# fileLoc = os.path.join(dataDir,fname)
# hf = h5py.File(fileLoc, 'r')
# mel_sp = hf.get('mel_spectr')[()]
# metadata_total = pd.read_hdf(fileLoc, 'info')
# hf.close()

# ##Saving files

# np.save('./data/mel_sp.npy', mel_sp)
# metadata_total.to_pickle('./data/metadata_total.pkl', protocol=4)

In [6]:
import numpy as np
import pandas as pd

mel_sp = np.load('./data/mel_sp.npy')
metadata_total = pd.read_pickle('./data/metadata_total.pkl')


In [7]:
original_label = list(metadata_total['species'])
lb_bin = LabelBinarizer()
lb_enc = LabelEncoder()
labels_one_hot = lb_bin.fit_transform(original_label)
labels_multi_lbl = lb_enc.fit_transform(original_label)

number_of_sample_classes = len(lb_enc.classes_)
print("Number of Species: ", number_of_sample_classes)
species_id_class_dict_tp = dict()
for (class_label, species_id) in enumerate(lb_bin.classes_):
    species_id_class_dict_tp[species_id] = class_label

Number of Species:  105


In [8]:
mel_sp_normalized = []
for i in range(len(mel_sp)):
    xx_ = normalize_mel_sp_slides(mel_sp[i]).astype('float32')
    mel_sp_normalized += [np.expand_dims(xx_, axis=-3)]
mel_sp_normalized = np.array(mel_sp_normalized)

In [9]:
batch_size = 16*2
shuffleBatches=True
num_epoch = 50

## CNN configs

In [10]:
cfg_cnn = [32, 'M', 64, 64, 'M', 128, 128, 128, 'M', 128, 128, 128, 'M'] # CNN1
# n_units = 128*2

cfg_cnn2 = [32, 64, 'M', 64, 64, 64, 'M', 128, 128, 128, 'M', 128, 128, 128, 'M', 256, 256, 256, 'M']
# n_units = 256*2

cfg_cnn3 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] # CNN3
n_units = 512*2

## RNN configs

For RNN, a list of configs could be provided for testing multiple configurations.

Each configuration element is dictionary with key as 'ordered' name of required RNNS. For example, to have 2 layers of GRUs, use 'GRU_0', 'GRU_1', similarly, for 1 GRU followed by 1 LMU, use 'GRU_0', 'LMU_1', contrary, to use LMU and then GRU, use 'LMU_0', 'GRU_1'. Currently supported RNN cells are LSTM, GRU, and LMU.

Each key has value as another dictionary with entries:
input_size-> input dimension of this RNN cell
h_states_ctr-> number of inner states in the RNN cell. For LSTM it is 2, GRU has 1, LMU has 2.

In [11]:
hidden_size = 512
rnnConfigs = [
    {'LSTM_0':{'input_size':n_units, 'h_states_ctr':2},
    'LSTM_1':{'input_size':hidden_size, 'h_states_ctr':2}  # 2 layers of LSTM cell
    },
    {'LMU_0':{'input_size':n_units, 'h_states_ctr':2},
    'LMU_1':{'input_size':hidden_size, 'h_states_ctr':2}, # 2 layers of LMU cell
    },
    {'GRU_0':{'input_size':n_units, 'h_states_ctr':1},
    'GRU_1':{'input_size':hidden_size, 'h_states_ctr':1}, # 2 layers of GRU cell
    },
    {'GRU_0':{'input_size':n_units, 'h_states_ctr':1},
    'LMU_1':{'input_size':hidden_size, 'h_states_ctr':2}, # 1 GRU cell and then 1 LMU cell
    },
]

make sure to assign different exp_no for each experiments

In [12]:
# device = torch.device('cpu')

In [14]:
exp_no_base = 0
exp_ctr = 0
for ii, cfg in enumerate(rnnConfigs):
    exp_ctr += 1

    exp_no = exp_no_base + exp_ctr
    log_file_name = f'100_species_spectr_cnn_rnn_7sec_h_{hidden_size}_nl_{ii+1}_{exp_no}.p'
    store_ = log_results(file_name=log_file_name, results_dir = resultsDir)
    PATH_curr = os.path.join(tempDir, f'currentModel_cnn_rnn_{exp_no}.pt')
    saveModel = SaveBestModel(PATH=PATH_curr, monitor=-np.inf, verbose=True)

    exp_ind = 0
    skf = StratifiedKFold(n_splits=5, random_state=None)
    for train_ind, test_ind in skf.split(mel_sp_normalized, labels_multi_lbl):

        PATH_curr = os.path.join(tempDir, f'currentModel_cnn_rnn_{exp_no}_{exp_ind}.pt')
        saveModel = SaveBestModel(PATH=PATH_curr, monitor=-np.inf, verbose=True)

        X_train, X_test_p_valid = mel_sp_normalized[train_ind,:], mel_sp_normalized[test_ind,:]

        y_train, y_test_p_valid = labels_one_hot[train_ind], labels_one_hot[test_ind]
        y_train_mlbl, y_test_p_valid_mlbl = labels_multi_lbl[train_ind], labels_multi_lbl[test_ind]
        X_valid, X_test, y_valid, y_test = train_test_split(X_test_p_valid, y_test_p_valid,
                                                               test_size=0.5,
                                                               stratify=y_test_p_valid_mlbl,
                                                               random_state=42)

        print('X_train shape: ', X_train.shape)
        print('X_valid shape: ', X_valid.shape)
        print('X_test shape: ', X_test.shape)

        X_train, X_valid  = torch.from_numpy(X_train).float(), torch.from_numpy(X_valid).float()
        y_train, y_valid = torch.from_numpy(y_train), torch.from_numpy(y_valid)

        y_train, y_valid = y_train.float(), y_valid.float()
        train_use = data_utils.TensorDataset(X_train, y_train)
        train_loader = data_utils.DataLoader(train_use, batch_size=batch_size, shuffle=shuffleBatches)

        val_use = data_utils.TensorDataset(X_valid, y_valid)
        val_loader = data_utils.DataLoader(val_use, batch_size=32, shuffle=False)

        model = cnn_rnn(cnnConfig = cfg_cnn3, 
                        rnnConfig = cfg, 
                        hidden_size=hidden_size, 
                        # order=order,
                        # theta=theta,
                        num_classes=105)
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001, weight_decay=1e-7)

        val_acc_epochs = []
        val_loss_epochs = []
        for epoch in range(1, num_epoch+1):
            train_loss = train_seq(model, train_loader, optimizer, epoch, 
                                    device,
                                    verbose=1, loss_fn = 'bceLogit')
            val_loss, val_acc = test_seq(model, val_loader,
                                        device,
                                        loss_fn = 'bceLogit')
            val_acc_epochs.append(val_acc)
            val_loss_epochs.append(val_loss)
            print('val loss = %f, val acc = %f'%(val_loss, val_acc))
            saveModel.check(model, val_acc, comp='max')

        # loading best validated model
        model = cnn_rnn(cnnConfig = cfg_cnn3, 
                        rnnConfig = cfg, 
                        hidden_size=hidden_size, 
                        # order=order,
                        # theta=theta,
                        num_classes=105)
        model.to(device)
        model.load_state_dict(torch.load(PATH_curr))

        X_test, y_test  = torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()

        test_use = data_utils.TensorDataset(X_test, y_test)
        test_loader = data_utils.DataLoader(test_use, batch_size=32, shuffle=False)
        test_loss, test_acc = test_seq(model, test_loader,
                                    device,
                                    loss_fn = 'bceLogit')
        print('test loss = %f, test acc = %f'%(test_loss, test_acc))

        log_ = dict(
                exp_ind = exp_ind,
                epochs = num_epoch,
                validation_accuracy = val_acc_epochs,
                validation_loss = val_loss_epochs,
                test_loss = test_loss,
                test_accuracy = test_acc,
                X_train_shape = X_train.shape,
                X_valid_shape = X_valid.shape,
                batch_size =batch_size,
        )
        store_.update(log_)
        exp_ind += 1    

X_train shape:  (42000, 26, 1, 128, 32)
X_valid shape:  (5250, 26, 1, 128, 32)
X_test shape:  (5250, 26, 1, 128, 32)


val loss = 0.037570, val acc = 0.270857
saving best model...


val loss = 0.024697, val acc = 0.553714
saving best model...


val loss = 0.019482, val acc = 0.660381
saving best model...


val loss = 0.015762, val acc = 0.722095
saving best model...


val loss = 0.019093, val acc = 0.656571


val loss = 0.012661, val acc = 0.781333
saving best model...


val loss = 0.011663, val acc = 0.804190
saving best model...


val loss = 0.011292, val acc = 0.801333


val loss = 0.010838, val acc = 0.815429
saving best model...


val loss = 0.011219, val acc = 0.814667


val loss = 0.012430, val acc = 0.809143


val loss = 0.009820, val acc = 0.838286
saving best model...


val loss = 0.009624, val acc = 0.850095
saving best model...


val loss = 0.010892, val acc = 0.828381


val loss = 0.010692, val acc = 0.846286


val loss = 0.012060, val acc = 0.832762


val loss = 0.010211, val acc = 0.862857
saving best model...


val loss = 0.010944, val acc = 0.852571


val loss = 0.010393, val acc = 0.866667
saving best model...


val loss = 0.011170, val acc = 0.855429


val loss = 0.010519, val acc = 0.859810


val loss = 0.011629, val acc = 0.841905


val loss = 0.010919, val acc = 0.855048


val loss = 0.012207, val acc = 0.841714


val loss = 0.011714, val acc = 0.841143


val loss = 0.013304, val acc = 0.834095


val loss = 0.010333, val acc = 0.874857
saving best model...


val loss = 0.009868, val acc = 0.882286
saving best model...


val loss = 0.011386, val acc = 0.857333


val loss = 0.011736, val acc = 0.866857


val loss = 0.013163, val acc = 0.828952


val loss = 0.011225, val acc = 0.864952


val loss = 0.011841, val acc = 0.868000


val loss = 0.010781, val acc = 0.882667
saving best model...


val loss = 0.010921, val acc = 0.868381


val loss = 0.012240, val acc = 0.864000


val loss = 0.010242, val acc = 0.878667


val loss = 0.011030, val acc = 0.875619


val loss = 0.010102, val acc = 0.883238
saving best model...


val loss = 0.011630, val acc = 0.876381


val loss = 0.011220, val acc = 0.884381
saving best model...


val loss = 0.010124, val acc = 0.883810


val loss = 0.011933, val acc = 0.872381


val loss = 0.011099, val acc = 0.880000


val loss = 0.011030, val acc = 0.877333


val loss = 0.011861, val acc = 0.860190


val loss = 0.011029, val acc = 0.888000
saving best model...


val loss = 0.011205, val acc = 0.878857


val loss = 0.010770, val acc = 0.891810
saving best model...


val loss = 0.014506, val acc = 0.836381


  model.load_state_dict(torch.load(PATH_curr))


test loss = 0.010446, test acc = 0.894286
X_train shape:  (42000, 26, 1, 128, 32)
X_valid shape:  (5250, 26, 1, 128, 32)
X_test shape:  (5250, 26, 1, 128, 32)


val loss = 0.036948, val acc = 0.276762
saving best model...


val loss = 0.025510, val acc = 0.520381
saving best model...


val loss = 0.020149, val acc = 0.641143
saving best model...


val loss = 0.016943, val acc = 0.693333
saving best model...


val loss = 0.015007, val acc = 0.731810
saving best model...


val loss = 0.014004, val acc = 0.769143
saving best model...


val loss = 0.012046, val acc = 0.796952
saving best model...


val loss = 0.012349, val acc = 0.791619


val loss = 0.011356, val acc = 0.804000
saving best model...


val loss = 0.010823, val acc = 0.827619
saving best model...


val loss = 0.010444, val acc = 0.829905
saving best model...


val loss = 0.009728, val acc = 0.842667
saving best model...


val loss = 0.010151, val acc = 0.844381
saving best model...


val loss = 0.010311, val acc = 0.838286


val loss = 0.010457, val acc = 0.848000
saving best model...


val loss = 0.011000, val acc = 0.850095
saving best model...


val loss = 0.009538, val acc = 0.859048
saving best model...


val loss = 0.009916, val acc = 0.871619
saving best model...


val loss = 0.020102, val acc = 0.734476


val loss = 0.011175, val acc = 0.843048


val loss = 0.011428, val acc = 0.858476


val loss = 0.010088, val acc = 0.871238


  if comp is 'min':
  elif comp is 'max':
  if comp is 'min':
  elif comp is 'max':
  if comp is 'min':
  elif comp is 'max':
  if comp is 'min':
  elif comp is 'max':
  if comp is 'min':
  elif comp is 'max':
  if comp is 'min':
  elif comp is 'max':


KeyboardInterrupt: 