# Feature Class

In [None]:
# Contains routines for labels creation, features extraction and normalization
import os
import numpy as np
import scipy.io.wavfile as wav
from sklearn import preprocessing
import joblib
from IPython import embed
import matplotlib.pyplot as plot
import librosa
plot.switch_backend('agg')


class FeatureClass:
    def __init__(self, dataset_dir='', feat_label_dir='', dataset='mic', is_eval=False):

        # Input directories
        self._feat_label_dir = feat_label_dir
        self._dataset_dir = dataset_dir
        self._dataset_combination = '{}_{}'.format(dataset, 'eval' if is_eval else 'dev')
        self._aud_dir = os.path.join(self._dataset_dir, self._dataset_combination)

        self._desc_dir = None if is_eval else os.path.join(self._dataset_dir, 'metadata_dev')

        # Output directories
        self._label_dir = None
        self._feat_dir = None
        self._feat_dir_norm = None

        # Local parameters
        self._is_eval = is_eval

        self._fs = 48000
        self._hop_len_s = 0.02
        self._hop_len = int(self._fs * self._hop_len_s)
        self._frame_res = self._fs / float(self._hop_len)
        self._nb_frames_1s = int(self._frame_res)

        self._win_len = 2 * self._hop_len
        self._nfft = self._next_greater_power_of_2(self._win_len)

        self._dataset = dataset
        self._eps = np.spacing(np.float(1e-16))
        self._nb_channels = 4

        self._unique_classes = dict()
        self._unique_classes = \
            {
                'clearthroat': 2,
                'cough': 8,
                'doorslam': 9,
                'drawer': 1,
                'keyboard': 6,
                'keysDrop': 4,
                'knock': 0,
                'laughter': 10,
                'pageturn': 7,
                'phone': 3,
                'speech': 5
            }

        self._doa_resolution = 10
        self._azi_list = range(-180, 180, self._doa_resolution)
        self._length = len(self._azi_list)
        self._ele_list = range(-40, 50, self._doa_resolution)
        self._height = len(self._ele_list)

        self._audio_max_len_samples = 60 * self._fs  

        # For regression task only
        self._default_azi = 180
        self._default_ele = 50

        if self._default_azi in self._azi_list:
            print('ERROR: chosen default_azi value {} should not exist in azi_list'.format(self._default_azi))
            exit()
        if self._default_ele in self._ele_list:
            print('ERROR: chosen default_ele value {} should not exist in ele_list'.format(self._default_ele))
            exit()

        self._max_frames = int(np.ceil(self._audio_max_len_samples / float(self._hop_len)))

    def _load_audio(self, audio_path):
        fs, audio = wav.read(audio_path)
        audio = audio[:, :self._nb_channels] / 32768.0 + self._eps
        if audio.shape[0] < self._audio_max_len_samples:
            zero_pad = np.zeros((self._audio_max_len_samples - audio.shape[0], audio.shape[1]))
            audio = np.vstack((audio, zero_pad))
        elif audio.shape[0] > self._audio_max_len_samples:
            audio = audio[:self._audio_max_len_samples, :]
        return audio, fs

    # INPUT FEATURES
    @staticmethod
    def _next_greater_power_of_2(x):
        return 2 ** (x - 1).bit_length()

    def _spectrogram(self, audio_input):
        _nb_ch = audio_input.shape[1]
        nb_bins = self._nfft // 2
        spectra = np.zeros((self._max_frames, nb_bins, _nb_ch), dtype=complex)
        for ch_cnt in range(_nb_ch):
            stft_ch = librosa.core.stft(audio_input[:, ch_cnt], n_fft=self._nfft, hop_length=self._hop_len,
                                        win_length=self._win_len, window='hann')
            spectra[:, :, ch_cnt] = stft_ch[1:, :self._max_frames].T
        return spectra

    def _extract_spectrogram_for_file(self, audio_filename):
        audio_in, fs = self._load_audio(os.path.join(self._aud_dir, audio_filename))
        audio_spec = self._spectrogram(audio_in)
        # print('\t{}'.format(audio_spec.shape))
        np.save(os.path.join(self._feat_dir, '{}.npy'.format(audio_filename.split('.')[0])), audio_spec.reshape(self._max_frames, -1))

    # OUTPUT LABELS
    def read_desc_file(self, desc_filename, in_sec=False):
        desc_file = {
            'class': list(), 'start': list(), 'end': list(), 'ele': list(), 'azi': list()
        }
        fid = open(desc_filename, 'r')
        next(fid)
        for line in fid:
            split_line = line.strip().split(',')
            desc_file['class'].append(split_line[0])
            # desc_file['class'].append(split_line[0].split('.')[0][:-3])
            if in_sec:
                # return onset-offset time in seconds
                desc_file['start'].append(float(split_line[1]))
                desc_file['end'].append(float(split_line[2]))
            else:
                # return onset-offset time in frames
                desc_file['start'].append(int(np.floor(float(split_line[1])*self._frame_res)))
                desc_file['end'].append(int(np.ceil(float(split_line[2])*self._frame_res)))
            desc_file['ele'].append(int(split_line[3]))
            desc_file['azi'].append(int(split_line[4]))
        fid.close()
        return desc_file

    def get_list_index(self, azi, ele):
        azi = (azi - self._azi_list[0]) // 10
        ele = (ele - self._ele_list[0]) // 10
        return azi * self._height + ele

    def get_matrix_index(self, ind):
        azi, ele = ind // self._height, ind % self._height
        azi = (azi * 10 + self._azi_list[0])
        ele = (ele * 10 + self._ele_list[0])
        return azi, ele

    def _get_doa_labels_regr(self, _desc_file):
        azi_label = self._default_azi*np.ones((self._max_frames, len(self._unique_classes)))
        ele_label = self._default_ele*np.ones((self._max_frames, len(self._unique_classes)))
        for i, ele_ang in enumerate(_desc_file['ele']):
            start_frame = _desc_file['start'][i]
            end_frame = self._max_frames if _desc_file['end'][i] > self._max_frames else _desc_file['end'][i]
            azi_ang = _desc_file['azi'][i]
            class_ind = self._unique_classes[_desc_file['class'][i]]
            if (azi_ang >= self._azi_list[0]) & (azi_ang <= self._azi_list[-1]) & \
                    (ele_ang >= self._ele_list[0]) & (ele_ang <= self._ele_list[-1]):
                azi_label[start_frame:end_frame + 1, class_ind] = azi_ang
                ele_label[start_frame:end_frame + 1, class_ind] = ele_ang
            else:
                print('bad_angle {} {}'.format(azi_ang, ele_ang))
        doa_label_regr = np.concatenate((azi_label, ele_label), axis=1)
        return doa_label_regr

    def _get_se_labels(self, _desc_file):
        se_label = np.zeros((self._max_frames, len(self._unique_classes)))
        for i, se_class in enumerate(_desc_file['class']):
            start_frame = _desc_file['start'][i]
            end_frame = self._max_frames if _desc_file['end'][i] > self._max_frames else _desc_file['end'][i]
            se_label[start_frame:end_frame + 1, self._unique_classes[se_class]] = 1
        return se_label

    def get_labels_for_file(self, _desc_file):

        se_label = self._get_se_labels(_desc_file)
        doa_label = self._get_doa_labels_regr(_desc_file)
        label_mat = np.concatenate((se_label, doa_label), axis=1)
        # print(label_mat.shape)
        return label_mat

    def get_clas_labels_for_file(self, _desc_file):

        _labels = np.zeros((self._max_frames, len(self._unique_classes), len(self._azi_list) * len(self._ele_list)))
        for _ind, _start_frame in enumerate(_desc_file['start']):
            _tmp_class = self._unique_classes[_desc_file['class'][_ind]]
            _tmp_azi = _desc_file['azi'][_ind]
            _tmp_ele = _desc_file['ele'][_ind]
            _tmp_end = self._max_frames if _desc_file['end'][_ind] > self._max_frames else _desc_file['end'][_ind]
            _tmp_ind = self.get_list_index(_tmp_azi, _tmp_ele)
            _labels[_start_frame:_tmp_end + 1, _tmp_class, _tmp_ind] = 1

        return _labels

    # ------------------------------- EXTRACT FEATURE AND PREPROCESS IT -------------------------------
    def extract_all_feature(self):
        # setting up folders
        self._feat_dir = self.get_unnormalized_feat_dir()
        create_folder(self._feat_dir)

        # extraction starts
        print('Extracting spectrogram:')
        print('\t\taud_dir {}\n\t\tdesc_dir {}\n\t\tfeat_dir {}'.format(
            self._aud_dir, self._desc_dir, self._feat_dir))

        for file_cnt, file_name in enumerate(os.listdir(self._aud_dir)):
            print('{}: {}'.format(file_cnt, file_name))
            wav_filename = '{}.wav'.format(file_name.split('.')[0])
            self._extract_spectrogram_for_file(wav_filename)

    def preprocess_features(self):
        # Setting up folders and filenames
        self._feat_dir = self.get_unnormalized_feat_dir()
        self._feat_dir_norm = self.get_normalized_feat_dir()
        create_folder(self._feat_dir_norm)
        normalized_features_wts_file = self.get_normalized_wts_file()
        spec_scaler = None

        # pre-processing starts
        if self._is_eval:
            spec_scaler = joblib.load(normalized_features_wts_file)
            print('Normalized_features_wts_file: {}. Loaded.'.format(normalized_features_wts_file))

        else:
            print('Estimating weights for normalizing feature files:')
            print('\t\tfeat_dir: {}'.format(self._feat_dir))

            spec_scaler = preprocessing.StandardScaler()
            for file_cnt, file_name in enumerate(os.listdir(self._feat_dir)):
                print('{}: {}'.format(file_cnt, file_name))
                feat_file = np.load(os.path.join(self._feat_dir, file_name))
                spec_scaler.partial_fit(np.concatenate((np.abs(feat_file), np.angle(feat_file)), axis=1))
                del feat_file
            joblib.dump(
                spec_scaler,
                normalized_features_wts_file
            )
            print('Normalized_features_wts_file: {}. Saved.'.format(normalized_features_wts_file))

        print('Normalizing feature files:')
        print('\t\tfeat_dir_norm {}'.format(self._feat_dir_norm))
        for file_cnt, file_name in enumerate(os.listdir(self._feat_dir)):
            print('{}: {}'.format(file_cnt, file_name))
            feat_file = np.load(os.path.join(self._feat_dir, file_name))
            feat_file = spec_scaler.transform(np.concatenate((np.abs(feat_file), np.angle(feat_file)), axis=1))
            np.save(
                os.path.join(self._feat_dir_norm, file_name),
                feat_file
            )
            del feat_file

        print('normalized files written to {}'.format(self._feat_dir_norm))

    # ------------------------------- EXTRACT LABELS AND PREPROCESS IT -------------------------------
    def extract_all_labels(self):
        self._label_dir = self.get_label_dir()

        print('Extracting labels:')
        print('\t\taud_dir {}\n\t\tdesc_dir {}\n\t\tlabel_dir {}'.format(
            self._aud_dir, self._desc_dir, self._label_dir))
        create_folder(self._label_dir)

        for file_cnt, file_name in enumerate(os.listdir(self._desc_dir)):
            print('{}: {}'.format(file_cnt, file_name))
            wav_filename = '{}.wav'.format(file_name.split('.')[0])
            desc_file = self.read_desc_file(os.path.join(self._desc_dir, file_name))
            label_mat = self.get_labels_for_file(desc_file)
            np.save(os.path.join(self._label_dir, '{}.npy'.format(wav_filename.split('.')[0])), label_mat)

    # ------------------------------- Misc public functions -------------------------------
    def get_classes(self):
        return self._unique_classes

    def get_normalized_feat_dir(self):
        return os.path.join(
            self._feat_label_dir,
            '{}_norm'.format(self._dataset_combination)
        )

    def get_unnormalized_feat_dir(self):
        return os.path.join(
            self._feat_label_dir,
            '{}'.format(self._dataset_combination)
        )

    def get_label_dir(self):
        if self._is_eval:
            return None
        else:
            return os.path.join(
                self._feat_label_dir, '{}_label'.format(self._dataset_combination)
            )

    def get_normalized_wts_file(self):
        return os.path.join(
            self._feat_label_dir,
            '{}_wts'.format(self._dataset)
        )

    def get_default_azi_ele_regr(self):
        return self._default_azi, self._default_ele

    def get_nb_channels(self):
        return self._nb_channels

    def nb_frames_1s(self):
        return self._nb_frames_1s

    def get_hop_len_sec(self):
        return self._hop_len_s

    def get_azi_ele_list(self):
        return self._azi_list, self._ele_list

    def get_nb_frames(self):
        return self._max_frames
    

def create_folder(folder_name):
    if not os.path.exists(folder_name):
        print('{} folder does not exist, creating it.'.format(folder_name))
        os.makedirs(folder_name)

# Batch Feature extraction

In [None]:
process_str = 'dev, eval'  

dataset_name = 'mic'  
dataset_dir = 'D:\sap'   # Base folder containing the mic and metadata folders
feat_label_dir = 'D:\sap\sap_feat'  # Directory to dump extracted features and labels


if 'dev' in process_str:
    # -------------- Extract features and labels for development set -----------------------------
    dev_feat_cls = FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir,
                                                  feat_label_dir=feat_label_dir)

    # Extract features and normalize them
    dev_feat_cls.extract_all_feature()
    dev_feat_cls.preprocess_features()

    # # Extract labels in regression mode
    dev_feat_cls.extract_all_labels()


if 'eval' in process_str:
    # -----------------------------Extract ONLY features for evaluation set-----------------------------
    eval_feat_cls = FeatureClass(dataset=dataset_name, dataset_dir=dataset_dir,
                                                   feat_label_dir=feat_label_dir, is_eval=True)

    # Extract features and normalize them
    eval_feat_cls.extract_all_feature()
    eval_feat_cls.preprocess_features()

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._eps = np.spacing(np.float(1e-16))


D:\sap\sap_feat\mic_dev folder does not exist, creating it.
Extracting spectrogram:
		aud_dir D:\sap\mic_dev
		desc_dir D:\sap\metadata_dev
		feat_dir D:\sap\sap_feat\mic_dev
0: split1_ir0_ov1_1.wav
1: split1_ir0_ov1_10.wav
2: split1_ir0_ov1_2.wav
3: split1_ir0_ov1_3.wav
4: split1_ir0_ov1_4.wav
5: split1_ir0_ov1_5.wav
6: split1_ir0_ov1_6.wav
7: split1_ir0_ov1_7.wav
8: split1_ir0_ov1_8.wav
9: split1_ir0_ov1_9.wav
10: split1_ir0_ov2_11.wav
11: split1_ir0_ov2_12.wav
12: split1_ir0_ov2_13.wav
13: split1_ir0_ov2_14.wav
14: split1_ir0_ov2_15.wav
15: split1_ir0_ov2_16.wav
16: split1_ir0_ov2_17.wav
17: split1_ir0_ov2_18.wav
18: split1_ir0_ov2_19.wav
19: split1_ir0_ov2_20.wav
20: split1_ir1_ov1_21.wav
21: split1_ir1_ov1_22.wav
22: split1_ir1_ov1_23.wav
23: split1_ir1_ov1_24.wav
24: split1_ir1_ov1_25.wav
25: split1_ir1_ov1_26.wav
26: split1_ir1_ov1_27.wav
27: split1_ir1_ov1_28.wav
28: split1_ir1_ov1_29.wav
29: split1_ir1_ov1_30.wav
30: split1_ir1_ov2_31.wav
31: split1_ir1_ov2_32.wav
32: split1_i

  fs, audio = wav.read(audio_path)


71: split1_ir3_ov2_72.wav
72: split1_ir3_ov2_73.wav
73: split1_ir3_ov2_74.wav
74: split1_ir3_ov2_75.wav
75: split1_ir3_ov2_76.wav
76: split1_ir3_ov2_77.wav
77: split1_ir3_ov2_78.wav
78: split1_ir3_ov2_79.wav
79: split1_ir3_ov2_80.wav
80: split1_ir4_ov1_81.wav
81: split1_ir4_ov1_82.wav
82: split1_ir4_ov1_83.wav
83: split1_ir4_ov1_84.wav
84: split1_ir4_ov1_85.wav
85: split1_ir4_ov1_86.wav
86: split1_ir4_ov1_87.wav
87: split1_ir4_ov1_88.wav
88: split1_ir4_ov1_89.wav
89: split1_ir4_ov1_90.wav
90: split1_ir4_ov2_100.wav
91: split1_ir4_ov2_91.wav
92: split1_ir4_ov2_92.wav
93: split1_ir4_ov2_93.wav
94: split1_ir4_ov2_94.wav
95: split1_ir4_ov2_95.wav
96: split1_ir4_ov2_96.wav
97: split1_ir4_ov2_97.wav
98: split1_ir4_ov2_98.wav
99: split1_ir4_ov2_99.wav
100: split2_ir0_ov1_1.wav
101: split2_ir0_ov1_10.wav
102: split2_ir0_ov1_2.wav
103: split2_ir0_ov1_3.wav
104: split2_ir0_ov1_4.wav
105: split2_ir0_ov1_5.wav
106: split2_ir0_ov1_6.wav
107: split2_ir0_ov1_7.wav
108: split2_ir0_ov1_8.wav
109: split

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._eps = np.spacing(np.float(1e-16))


1: split0_10.wav
2: split0_100.wav
3: split0_11.wav
4: split0_12.wav
5: split0_13.wav
6: split0_14.wav
7: split0_15.wav
8: split0_16.wav
9: split0_17.wav
10: split0_18.wav
11: split0_19.wav
12: split0_2.wav
13: split0_20.wav
14: split0_21.wav
15: split0_22.wav
16: split0_23.wav
17: split0_24.wav
18: split0_25.wav
19: split0_26.wav
20: split0_27.wav
21: split0_28.wav
22: split0_29.wav
23: split0_3.wav
24: split0_30.wav
25: split0_31.wav
26: split0_32.wav
27: split0_33.wav
28: split0_34.wav
29: split0_35.wav
30: split0_36.wav
31: split0_37.wav
32: split0_38.wav
33: split0_39.wav
34: split0_4.wav
35: split0_40.wav
36: split0_41.wav
37: split0_42.wav
38: split0_43.wav
39: split0_44.wav
40: split0_45.wav
41: split0_46.wav
42: split0_47.wav
43: split0_48.wav
44: split0_49.wav
45: split0_5.wav
46: split0_50.wav
47: split0_51.wav
48: split0_52.wav
49: split0_53.wav
50: split0_54.wav
51: split0_55.wav
52: split0_56.wav
53: split0_57.wav
54: split0_58.wav
55: split0_59.wav
56: split0_6.wav
57: s

# Parameters

In [None]:

def get_params(argv):
    print("SET: {}".format(argv))
    params = dict(
        quick_test=False,     

        # INPUT PATH
        dataset_dir='D:\sap',  

        # OUTPUT PATH
        feat_label_dir='D:\sap\sap_feat',  
        model_dir='D:\sap\model_lab',   
        dcase_output=True,     
                               
        dcase_dir='D:\sap\seld-dcase2019-master\dcase',  

        # DATASET LOADING PARAMETERS
        mode='dev',         
        dataset='mic',       

        # DNN MODEL PARAMETERS
        sequence_length=128,        # Feature sequence length
        batch_size=16,              # Batch size
        dropout_rate=0,             # Dropout rate, constant for all layers
        nb_cnn2d_filt=64,           # Number of CNN nodes, constant for each layer
        pool_size=[8, 8, 4],        # CNN pooling, length of list = number of CNN layers, list value = pooling per layer
        rnn_size=[128, 128],        # RNN contents, length of list = number of layers, list value = number of nodes
        fnn_size=[128],             # FNN contents, length of list = number of layers, list value = number of nodes
        loss_weights=[1., 50.],     # [sed, doa] weight for scaling the DNN outputs
        nb_epochs=20,               # Train for maximum epochs
        epochs_per_fit=5,           # Number of epochs per fit

    )
    params['patience'] = int(0.1 * params['nb_epochs'])
    
    if argv == '1':
        print("USING DEFAULT PARAMETERS\n")

    elif argv == '2':
        params['mode'] = 'dev'
        params['dataset'] = 'mic'

    elif argv == '3':
        params['mode'] = 'eval'
        params['dataset'] = 'mic'

    elif argv == '4':
        params['mode'] = 'dev'
        params['dataset'] = 'foa'

    elif argv == '5':
        params['mode'] = 'eval'
        params['dataset'] = 'foa'

    # Quick test
    elif argv == '999':
        print("QUICK TEST MODE\n")
        params['quick_test'] = True
        params['epochs_per_fit'] = 1

    else:
        print('ERROR: unknown argument {}'.format(argv))
        exit()

    for key, value in params.items():
        print("\t{}: {}".format(key, value))
    return params

# Data Generator

In [None]:

import os
import numpy as np
from IPython import embed
from collections import deque
import random


class DataGenerator(object):
    def __init__(
            self, dataset='mic', feat_label_dir='', is_eval=False, split=1, batch_size=32, seq_len=64,
            shuffle=True, per_file=False
    ):
        self._per_file = per_file
        self._is_eval = is_eval
        self._splits = np.array(split)
        self._batch_size = batch_size
        self._seq_len = seq_len
        self._shuffle = shuffle
        self._feat_cls = FeatureClass(feat_label_dir=feat_label_dir, dataset=dataset, is_eval=is_eval)
        self._label_dir = self._feat_cls.get_label_dir()
        self._feat_dir = self._feat_cls.get_normalized_feat_dir()

        self._filenames_list = list()
        self._nb_frames_file = 0     
        self._feat_len = None
        self._2_nb_ch = 2 * self._feat_cls.get_nb_channels()
        self._label_len = None  
        self._doa_len = None    
        self._class_dict = self._feat_cls.get_classes()
        self._nb_classes = len(self._class_dict.keys())
        self._default_azi, self._default_ele = self._feat_cls.get_default_azi_ele_regr()
        self._get_filenames_list_and_feat_label_sizes()

        self._batch_seq_len = self._batch_size*self._seq_len
        self._circ_buf_feat = None
        self._circ_buf_label = None

        if self._per_file:
            self._nb_total_batches = len(self._filenames_list)
        else:
            self._nb_total_batches = int(np.floor((len(self._filenames_list) * self._nb_frames_file /
                                               float(self._seq_len * self._batch_size))))

        # self._dummy_feat_vec = np.ones(self._feat_len.shape) *

        print(
            '\tDatagen_mode: {}, nb_files: {}, nb_classes:{}\n'
            '\tnb_frames_file: {}, feat_len: {}, nb_ch: {}, label_len:{}\n'.format(
                'eval' if self._is_eval else 'dev', len(self._filenames_list),  self._nb_classes,
                self._nb_frames_file, self._feat_len, self._2_nb_ch, self._label_len
                )
        )

        print(
            '\tDataset: {}, split: {}\n'
            '\tbatch_size: {}, seq_len: {}, shuffle: {}\n'
            '\tlabel_dir: {}\n '
            '\tfeat_dir: {}\n'.format(
                dataset, split,
                self._batch_size, self._seq_len, self._shuffle,
                self._label_dir, self._feat_dir
            )
        )

    def get_data_sizes(self):
        feat_shape = (self._batch_size, self._2_nb_ch, self._seq_len, self._feat_len)
        if self._is_eval:
            label_shape = None
        else:
            label_shape = [
                (self._batch_size, self._seq_len, self._nb_classes),
                (self._batch_size, self._seq_len, self._nb_classes*2)
            ]
        return feat_shape, label_shape

    def get_total_batches_in_data(self):
        return self._nb_total_batches

    def _get_filenames_list_and_feat_label_sizes(self):
        for filename in os.listdir(self._feat_dir):
            if int(filename[5]) in self._splits: 
                self._filenames_list.append(filename)

        temp_feat = np.load(os.path.join(self._feat_dir, self._filenames_list[0]))
        self._nb_frames_file = temp_feat.shape[0]
        self._feat_len = temp_feat.shape[1] // self._2_nb_ch

        if not self._is_eval:
            temp_label = np.load(os.path.join(self._label_dir, self._filenames_list[0]))
            self._label_len = temp_label.shape[-1]
            self._doa_len = (self._label_len - self._nb_classes)//self._nb_classes

        if self._per_file:
            self._batch_size = int(np.ceil(temp_feat.shape[0]/float(self._seq_len)))

        return

    def generate(self):
        """
        Generates batches of samples
        :return: 
        """

        while 1:
            if self._shuffle:
                random.shuffle(self._filenames_list)

            
            self._circ_buf_feat = deque()
            self._circ_buf_label = deque()

            file_cnt = 0
            if self._is_eval:
                for i in range(self._nb_total_batches):
                    
                    while len(self._circ_buf_feat) < self._batch_seq_len:
                        temp_feat = np.load(os.path.join(self._feat_dir, self._filenames_list[file_cnt]))

                        for row_cnt, row in enumerate(temp_feat):
                            self._circ_buf_feat.append(row)

                        
                        if self._per_file:
                            extra_frames = self._batch_seq_len - temp_feat.shape[0]
                            extra_feat = np.ones((extra_frames, temp_feat.shape[1])) * 1e-6

                            for row_cnt, row in enumerate(extra_feat):
                                self._circ_buf_feat.append(row)

                        file_cnt = file_cnt + 1

                    
                    feat = np.zeros((self._batch_seq_len, self._feat_len * self._2_nb_ch))
                    for j in range(self._batch_seq_len):
                        feat[j, :] = self._circ_buf_feat.popleft()
                    feat = np.reshape(feat, (self._batch_seq_len, self._feat_len, self._2_nb_ch))

                    
                    feat = self._split_in_seqs(feat)
                    feat = np.transpose(feat, (0, 3, 1, 2))

                    yield feat

            else:
                for i in range(self._nb_total_batches):

                
                    while len(self._circ_buf_feat) < self._batch_seq_len:
                        temp_feat = np.load(os.path.join(self._feat_dir, self._filenames_list[file_cnt]))
                        temp_label = np.load(os.path.join(self._label_dir, self._filenames_list[file_cnt]))

                        for row_cnt, row in enumerate(temp_feat):
                            self._circ_buf_feat.append(row)
                            self._circ_buf_label.append(temp_label[row_cnt])

                        
                        if self._per_file:
                            extra_frames = self._batch_seq_len - temp_feat.shape[0]
                            extra_feat = np.ones((extra_frames, temp_feat.shape[1])) * 1e-6

                            extra_labels = np.zeros((extra_frames, temp_label.shape[1]))
                            extra_labels[:, self._nb_classes:2 * self._nb_classes] = self._default_azi
                            extra_labels[:, 2 * self._nb_classes:] = self._default_ele

                            for row_cnt, row in enumerate(extra_feat):
                                self._circ_buf_feat.append(row)
                                self._circ_buf_label.append(extra_labels[row_cnt])

                        file_cnt = file_cnt + 1

                    
                    feat = np.zeros((self._batch_seq_len, self._feat_len * self._2_nb_ch))
                    label = np.zeros((self._batch_seq_len, self._label_len))
                    for j in range(self._batch_seq_len):
                        feat[j, :] = self._circ_buf_feat.popleft()
                        label[j, :] = self._circ_buf_label.popleft()
                    feat = np.reshape(feat, (self._batch_seq_len, self._feat_len, self._2_nb_ch))

                  
                    feat = self._split_in_seqs(feat)
                    feat = np.transpose(feat, (0, 3, 1, 2))
                    label = self._split_in_seqs(label)

                  
                    azi_rad = label[:, :, self._nb_classes:2 * self._nb_classes] * np.pi / 180
                   

                    
                    ele_rad = label[:, :, 2 * self._nb_classes:] * np.pi / self._default_ele

                    label = [
                        label[:, :, :self._nb_classes],  
                        np.concatenate((azi_rad, ele_rad), -1)  
                         ]

                    yield feat, label

    def _split_in_seqs(self, data):
        if len(data.shape) == 1:
            if data.shape[0] % self._seq_len:
                data = data[:-(data.shape[0] % self._seq_len), :]
            data = data.reshape((data.shape[0] // self._seq_len, self._seq_len, 1))
        elif len(data.shape) == 2:
            if data.shape[0] % self._seq_len:
                data = data[:-(data.shape[0] % self._seq_len), :]
            data = data.reshape((data.shape[0] // self._seq_len, self._seq_len, data.shape[1]))
        elif len(data.shape) == 3:
            if data.shape[0] % self._seq_len:
                data = data[:-(data.shape[0] % self._seq_len), :, :]
            data = data.reshape((data.shape[0] // self._seq_len, self._seq_len, data.shape[1], data.shape[2]))
        else:
            print('ERROR: Unknown data dimensions: {}'.format(data.shape))
            exit()
        return data

    @staticmethod
    def split_multi_channels(data, num_channels):
        tmp = None
        in_shape = data.shape
        if len(in_shape) == 3:
            hop = in_shape[2] / num_channels
            tmp = np.zeros((in_shape[0], num_channels, in_shape[1], hop))
            for i in range(num_channels):
                tmp[:, i, :, :] = data[:, :, i * hop:(i + 1) * hop]
        elif len(in_shape) == 4 and num_channels == 1:
            tmp = np.zeros((in_shape[0], 1, in_shape[1], in_shape[2], in_shape[3]))
            tmp[:, 0, :, :, :] = data
        else:
            print('ERROR: The input should be a 3D matrix but it seems to have dimensions: {}'.format(in_shape))
            exit()
        return tmp

    def get_default_elevation(self):
        return self._default_ele

    def get_azi_ele_list(self):
        return self._feat_cls.get_azi_ele_list()

    def get_list_index(self, azi, ele):
        return self._feat_cls.get_list_index(azi, ele)

    def get_matrix_index(self, ind):
        return self._feat_cls.get_matrix_index(ind)

    def get_nb_classes(self):
        return self._nb_classes

    def nb_frames_1s(self):
        return self._feat_cls.nb_frames_1s()

    def get_hop_len_sec(self):
        return self._feat_cls.get_hop_len_sec()

    def get_classes(self):
        return self._feat_cls.get_classes()
    
    def get_filelist(self):
        return self._filenames_list

    def get_frame_per_file(self):
        return self._batch_seq_len

    def get_nb_frames(self):
        return self._feat_cls._max_frames

    def get_nb_frames(self):
        return self._feat_cls.get_nb_frames()

# Keras model

In [None]:
#
# The SELDnet architecture
#

from keras.layers import Bidirectional, Conv2D, MaxPooling2D, Input
from keras.layers.core import Dense, Activation, Dropout, Reshape, Permute
from keras.layers.recurrent import GRU
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.layers.wrappers import TimeDistributed
from keras.optimizers import Adam
import keras
keras.backend.set_image_data_format('channels_first')
from IPython import embed


def get_model(data_in, data_out, dropout_rate, nb_cnn2d_filt, pool_size,
                                rnn_size, fnn_size, weights):
    # model definition
    s = 0
    spec_start = Input(shape=(data_in[-3], data_in[-2], data_in[-1]))
    # CNN
    spec_cnn = spec_start
    for i, convCnt in enumerate(pool_size):
        spec_cnn = Conv2D(filters=nb_cnn2d_filt, kernel_size=(3, 3), padding='same')(spec_cnn)
        spec_cnn = BatchNormalization()(spec_cnn)
        spec_cnn = Activation('relu')(spec_cnn)
        spec_cnn = MaxPooling2D(pool_size=(1, pool_size[i]))(spec_cnn)
        spec_cnn = Dropout(dropout_rate)(spec_cnn)
    spec_cnn = Permute((2, 1, 3))(spec_cnn)

    # RNN
    spec_rnn = Reshape((data_in[-2], -1))(spec_cnn)
    for nb_rnn_filt in rnn_size:
        spec_rnn = Bidirectional(
            GRU(nb_rnn_filt, activation='tanh', dropout=dropout_rate, recurrent_dropout=dropout_rate,
                return_sequences=True),
            merge_mode='mul'
        )(spec_rnn)

    # FC - DOA
    doa = spec_rnn
    for nb_fnn_filt in fnn_size:
        doa = TimeDistributed(Dense(nb_fnn_filt))(doa)
        doa = Dropout(dropout_rate)(doa)

    doa = TimeDistributed(Dense(data_out[1][-1]))(doa)
    doa = Activation('linear', name='doa_out')(doa)

    # FC - SED
    sed = spec_rnn
    for nb_fnn_filt in fnn_size:
        sed = TimeDistributed(Dense(nb_fnn_filt))(sed)
        sed = Dropout(dropout_rate)(sed)
    sed = TimeDistributed(Dense(data_out[0][-1]))(sed)
    sed = Activation('sigmoid', name='sed_out')(sed)

    model = Model(inputs=spec_start, outputs=[sed, doa])
    model.compile(optimizer=Adam(), loss=['binary_crossentropy', 'mse'], loss_weights=weights)

    model.summary()
    return model

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Evaluation metric

In [None]:

import numpy as np
from scipy.optimize import linear_sum_assignment
from IPython import embed
eps = np.finfo(np.float).eps

class evaluation_metrics(object):
    def __init__(self, nb_frames_1s=None, data_gen=None):
        # SED params
        self._S = 0
        self._D = 0
        self._I = 0
        self._TP = 0
        self._Nref = 0
        self._Nsys = 0
        self._block_size = nb_frames_1s

        # DOA params
        self._doa_loss_pred_cnt = 0
        self._nb_frames = 0

        self._doa_loss_pred = 0
        self._nb_good_pks = 0

        self._data_gen = data_gen

        self._less_est_cnt, self._less_est_frame_cnt = 0, 0
        self._more_est_cnt, self._more_est_frame_cnt = 0, 0

    def f1_overall_framewise(self, O, T):
        TP = ((2 * T - O) == 1).sum()
        Nref, Nsys = T.sum(), O.sum()
        self._TP += TP
        self._Nref += Nref
        self._Nsys += Nsys

    def er_overall_framewise(self, O, T):
        FP = np.logical_and(T == 0, O == 1).sum(1)
        FN = np.logical_and(T == 1, O == 0).sum(1)
        S = np.minimum(FP, FN).sum()
        D = np.maximum(0, FN - FP).sum()
        I = np.maximum(0, FP - FN).sum()
        self._S += S
        self._D += D
        self._I += I

    def f1_overall_1sec(self, O, T):        
        new_size = int(np.ceil(float(O.shape[0]) / self._block_size))
        O_block = np.zeros((new_size, O.shape[1]))
        T_block = np.zeros((new_size, O.shape[1]))
        for i in range(0, new_size):
            O_block[i, :] = np.max(O[int(i * self._block_size):int(i * self._block_size + self._block_size - 1), :], axis=0)
            T_block[i, :] = np.max(T[int(i * self._block_size):int(i * self._block_size + self._block_size - 1), :], axis=0)
        return self.f1_overall_framewise(O_block, T_block)

    def er_overall_1sec(self, O, T):        
        new_size = int(np.ceil(float(O.shape[0]) / self._block_size))
        O_block = np.zeros((new_size, O.shape[1]))
        T_block = np.zeros((new_size, O.shape[1]))
        for i in range(0, new_size):
            O_block[i, :] = np.max(O[int(i * self._block_size):int(i * self._block_size + self._block_size - 1), :], axis=0)
            T_block[i, :] = np.max(T[int(i * self._block_size):int(i * self._block_size + self._block_size - 1), :], axis=0)
        return self.er_overall_framewise(O_block, T_block)

    def update_sed_scores(self, pred, gt):
        self.f1_overall_1sec(pred, gt)
        self.er_overall_1sec(pred, gt)

    def compute_sed_scores(self):
        ER = (self._S + self._D + self._I) / (self._Nref + 0.0)
    
        prec = float(self._TP) / float(self._Nsys + eps)
        recall = float(self._TP) / float(self._Nref + eps)
        F = 2 * prec * recall / (prec + recall + eps)

        return ER, F

    def update_doa_scores(self, pred_doa_thresholded, gt_doa):
        self._doa_loss_pred_cnt += np.sum(pred_doa_thresholded)
        self._nb_frames += pred_doa_thresholded.shape[0]

        for frame in range(pred_doa_thresholded.shape[0]):
            nb_gt_peaks = int(np.sum(gt_doa[frame, :]))
            nb_pred_peaks = int(np.sum(pred_doa_thresholded[frame, :]))

            if nb_gt_peaks == nb_pred_peaks:
                self._nb_good_pks += 1
            elif nb_gt_peaks > nb_pred_peaks:
                self._less_est_frame_cnt += 1
                self._less_est_cnt += (nb_gt_peaks - nb_pred_peaks)
            elif nb_pred_peaks > nb_gt_peaks:
                self._more_est_frame_cnt += 1
                self._more_est_cnt += (nb_pred_peaks - nb_gt_peaks)

            
            if nb_gt_peaks and nb_pred_peaks:
                pred_ind = np.where(pred_doa_thresholded[frame] == 1)[1]
                pred_list_rad = np.array(self._data_gen .get_matrix_index(pred_ind)) * np.pi / 180

                gt_ind = np.where(gt_doa[frame] == 1)[1]
                gt_list_rad = np.array(self._data_gen .get_matrix_index(gt_ind)) * np.pi / 180

                frame_dist = distance_between_gt_pred(gt_list_rad.T, pred_list_rad.T)
                self._doa_loss_pred += frame_dist

    def compute_doa_scores(self):
        doa_error = self._doa_loss_pred / self._doa_loss_pred_cnt
        frame_recall = self._nb_good_pks / float(self._nb_frames)
        return doa_error, frame_recall

    def reset(self):
        # SED params
        self._S = 0
        self._D = 0
        self._I = 0
        self._TP = 0
        self._Nref = 0
        self._Nsys = 0

        # DOA params
        self._doa_loss_pred_cnt = 0
        self._nb_frames = 0

        self._doa_loss_pred = 0
        self._nb_good_pks = 0

        self._less_est_cnt, self._less_est_frame_cnt = 0, 0
        self._more_est_cnt, self._more_est_frame_cnt = 0, 0



def reshape_3Dto2D(A):
    return A.reshape(A.shape[0] * A.shape[1], A.shape[2])


def f1_overall_framewise(O, T):
    if len(O.shape) == 3:
        O, T = reshape_3Dto2D(O), reshape_3Dto2D(T)
    TP = ((2 * T - O) == 1).sum()
    Nref, Nsys = T.sum(), O.sum()

    prec = float(TP) / float(Nsys + eps)
    recall = float(TP) / float(Nref + eps)
    f1_score = 2 * prec * recall / (prec + recall + eps)
    return f1_score


def er_overall_framewise(O, T):
    if len(O.shape) == 3:
        O, T = reshape_3Dto2D(O), reshape_3Dto2D(T)

    FP = np.logical_and(T == 0, O == 1).sum(1)
    FN = np.logical_and(T == 1, O == 0).sum(1)

    S = np.minimum(FP, FN).sum()
    D = np.maximum(0, FN-FP).sum()
    I = np.maximum(0, FP-FN).sum()

    Nref = T.sum()
    ER = (S+D+I) / (Nref + 0.0)
    return ER


def f1_overall_1sec(O, T, block_size):
    if len(O.shape) == 3:
        O, T = reshape_3Dto2D(O), reshape_3Dto2D(T)
    new_size = int(np.ceil(float(O.shape[0]) / block_size))
    O_block = np.zeros((new_size, O.shape[1]))
    T_block = np.zeros((new_size, O.shape[1]))
    for i in range(0, new_size):
        O_block[i, :] = np.max(O[int(i * block_size):int(i * block_size + block_size - 1), :], axis=0)
        T_block[i, :] = np.max(T[int(i * block_size):int(i * block_size + block_size - 1), :], axis=0)
    return f1_overall_framewise(O_block, T_block)


def er_overall_1sec(O, T, block_size):
    if len(O.shape) == 3:
        O, T = reshape_3Dto2D(O), reshape_3Dto2D(T)
    new_size = int(np.ceil(float(O.shape[0]) / block_size))
    O_block = np.zeros((new_size, O.shape[1]))
    T_block = np.zeros((new_size, O.shape[1]))
    for i in range(0, new_size):
        O_block[i, :] = np.max(O[int(i * block_size):int(i * block_size + block_size - 1), :], axis=0)
        T_block[i, :] = np.max(T[int(i * block_size):int(i * block_size + block_size - 1), :], axis=0)
    return er_overall_framewise(O_block, T_block)


def compute_sed_scores(pred, gt, nb_frames_1s):
    f1o = f1_overall_1sec(pred, gt, nb_frames_1s)
    ero = er_overall_1sec(pred, gt, nb_frames_1s)
    scores = [ero, f1o]
    return scores


def compute_doa_scores_regr(pred_doa_rad, gt_doa_rad, pred_sed, gt_sed):

    nb_src_gt_list = np.zeros(gt_doa_rad.shape[0]).astype(int)
    nb_src_pred_list = np.zeros(gt_doa_rad.shape[0]).astype(int)
    good_frame_cnt = 0
    doa_loss_pred = 0.0
    nb_sed = gt_sed.shape[-1]

    less_est_cnt, less_est_frame_cnt = 0, 0
    more_est_cnt, more_est_frame_cnt = 0, 0

    for frame_cnt, sed_frame in enumerate(gt_sed):
        nb_src_gt_list[frame_cnt] = int(np.sum(sed_frame))
        nb_src_pred_list[frame_cnt] = int(np.sum(pred_sed[frame_cnt]))

      
        if nb_src_gt_list[frame_cnt] == nb_src_pred_list[frame_cnt]:
            good_frame_cnt = good_frame_cnt + 1
        elif nb_src_gt_list[frame_cnt] > nb_src_pred_list[frame_cnt]:
            less_est_cnt = less_est_cnt + nb_src_gt_list[frame_cnt] - nb_src_pred_list[frame_cnt]
            less_est_frame_cnt = less_est_frame_cnt + 1
        elif nb_src_gt_list[frame_cnt] < nb_src_pred_list[frame_cnt]:
            more_est_cnt = more_est_cnt + nb_src_pred_list[frame_cnt] - nb_src_gt_list[frame_cnt]
            more_est_frame_cnt = more_est_frame_cnt + 1

        
        if nb_src_gt_list[frame_cnt] and nb_src_pred_list[frame_cnt]:
            
            sed_frame_gt = gt_sed[frame_cnt]
            doa_frame_gt_azi = gt_doa_rad[frame_cnt][:nb_sed][sed_frame_gt == 1]
            doa_frame_gt_ele = gt_doa_rad[frame_cnt][nb_sed:][sed_frame_gt == 1]

            sed_frame_pred = pred_sed[frame_cnt]
            doa_frame_pred_azi = pred_doa_rad[frame_cnt][:nb_sed][sed_frame_pred == 1]
            doa_frame_pred_ele = pred_doa_rad[frame_cnt][nb_sed:][sed_frame_pred == 1]

            doa_loss_pred += distance_between_gt_pred(np.vstack((doa_frame_gt_azi, doa_frame_gt_ele)).T,
                                                      np.vstack((doa_frame_pred_azi, doa_frame_pred_ele)).T)

    doa_loss_pred_cnt = np.sum(nb_src_pred_list)
    if doa_loss_pred_cnt:
        doa_loss_pred /= doa_loss_pred_cnt

    frame_recall = good_frame_cnt / float(gt_sed.shape[0])
    er_metric = [doa_loss_pred, frame_recall, doa_loss_pred_cnt, good_frame_cnt, more_est_cnt, less_est_cnt]
    return er_metric


def compute_doa_scores_clas(pred_doa_thresholded, gt_doa, data_gen_test):
    doa_loss_pred_cnt = np.sum(pred_doa_thresholded)

    doa_loss_pred = 0
    nb_good_pks = 0

    less_est_cnt, less_est_frame_cnt = 0, 0
    more_est_cnt, more_est_frame_cnt = 0, 0

    for frame in range(pred_doa_thresholded.shape[0]):
        nb_gt_peaks = int(np.sum(gt_doa[frame, :]))
        nb_pred_peaks = int(np.sum(pred_doa_thresholded[frame, :]))

        
        if nb_gt_peaks == nb_pred_peaks:
            nb_good_pks += 1
        elif nb_gt_peaks > nb_pred_peaks:
            less_est_frame_cnt += 1
            less_est_cnt += (nb_gt_peaks - nb_pred_peaks)
        elif nb_pred_peaks > nb_gt_peaks:
            more_est_frame_cnt += 1
            more_est_cnt += (nb_pred_peaks - nb_gt_peaks)

        
        if nb_gt_peaks and nb_pred_peaks:
            pred_ind = np.where(pred_doa_thresholded[frame] == 1)[1]
            pred_list_rad = np.array(data_gen_test.get_matrix_index(pred_ind)) * np.pi / 180

            gt_ind = np.where(gt_doa[frame] == 1)[1]
            gt_list_rad = np.array(data_gen_test.get_matrix_index(gt_ind)) * np.pi / 180

            frame_dist = distance_between_gt_pred(gt_list_rad.T, pred_list_rad.T)
            doa_loss_pred += frame_dist

    if doa_loss_pred_cnt:
        doa_loss_pred /= doa_loss_pred_cnt

    frame_recall = nb_good_pks / float(pred_doa_thresholded.shape[0])
    er_metric = [doa_loss_pred, frame_recall, doa_loss_pred_cnt, nb_good_pks, more_est_cnt, less_est_cnt]
    return er_metric


def distance_between_gt_pred(gt_list_rad, pred_list_rad):
    gt_len, pred_len = gt_list_rad.shape[0], pred_list_rad.shape[0]
    ind_pairs = np.array([[x, y] for y in range(pred_len) for x in range(gt_len)])
    cost_mat = np.zeros((gt_len, pred_len))

    if gt_len and pred_len:
        az1, ele1, az2, ele2 = gt_list_rad[ind_pairs[:, 0], 0], gt_list_rad[ind_pairs[:, 0], 1], \
                               pred_list_rad[ind_pairs[:, 1], 0], pred_list_rad[ind_pairs[:, 1], 1]
        cost_mat[ind_pairs[:, 0], ind_pairs[:, 1]] = distance_between_spherical_coordinates_rad(az1, ele1, az2, ele2)

    row_ind, col_ind = linear_sum_assignment(cost_mat)
    cost = cost_mat[row_ind, col_ind].sum()
    return cost


def distance_between_spherical_coordinates_rad(az1, ele1, az2, ele2):
    dist = np.sin(ele1) * np.sin(ele2) + np.cos(ele1) * np.cos(ele2) * np.cos(np.abs(az1 - az2))
    
    dist = np.clip(dist, -1, 1)
    dist = np.arccos(dist) * 180 / np.pi
    return dist


def distance_between_cartesian_coordinates(x1, y1, z1, x2, y2, z2):
    dist = np.sqrt((x1-x2) ** 2 + (y1-y2) ** 2 + (z1-z2) ** 2)
    dist = 2 * np.arcsin(dist / 2.0) * 180/np.pi
    return dist


def sph2cart(azimuth, elevation, r):

    x = r * np.cos(elevation) * np.cos(azimuth)
    y = r * np.cos(elevation) * np.sin(azimuth)
    z = r * np.sin(elevation)
    return x, y, z


def cart2sph(x, y, z):

    azimuth = np.arctan2(y,x)
    elevation = np.arctan2(z,np.sqrt(x**2 + y**2))
    r = np.sqrt(x**2 + y**2 + z**2)
    return azimuth, elevation, r


###############################################################
# SELD scoring functions
###############################################################


def compute_seld_metric(sed_error, doa_error):
    seld_metric = np.mean([
        sed_error[0],
        1 - sed_error[1],
        doa_error[0]/180,
        1 - doa_error[1]]
        )
    return seld_metric


def compute_seld_metrics_from_output_format_dict(_pred_dict, _gt_dict, _feat_cls):
    _gt_labels = output_format_dict_to_classification_labels(_gt_dict, _feat_cls)
    _pred_labels = output_format_dict_to_classification_labels(_pred_dict, _feat_cls)

    _er, _f = compute_sed_scores(_pred_labels.max(2), _gt_labels.max(2), _feat_cls.nb_frames_1s())
    _doa_err, _frame_recall, d1, d2, d3, d4 = compute_doa_scores_clas(_pred_labels, _gt_labels, _feat_cls)
    _seld_scr = compute_seld_metric([_er, _f], [_doa_err, _frame_recall])
    return _seld_scr, _er, _f, _doa_err, _frame_recall



def output_format_dict_to_classification_labels(_output_dict, _feat_cls):

    _unique_classes = _feat_cls.get_classes()
    _nb_classes = len(_unique_classes)
    _azi_list, _ele_list = _feat_cls.get_azi_ele_list()
    _max_frames = _feat_cls.get_nb_frames()
    _labels = np.zeros((_max_frames, _nb_classes, len(_azi_list) * len(_ele_list)))

    for _frame_cnt in _output_dict.keys():
        if _frame_cnt < _max_frames:
            for _tmp_doa in _output_dict[_frame_cnt]:
                
                _tmp_doa[1] = np.clip(_tmp_doa[1], _azi_list[0], _azi_list[-1])
                _tmp_doa[2] = np.clip(_tmp_doa[2], _ele_list[0], _ele_list[-1])

                
                _labels[_frame_cnt, _tmp_doa[0], int(_feat_cls.get_list_index(_tmp_doa[1], _tmp_doa[2]))] = 1

    return _labels


def regression_label_format_to_output_format(_feat_cls, _sed_labels, _doa_labels_deg):

    _unique_classes = _feat_cls.get_classes()
    _nb_classes = len(_unique_classes)
    _azi_labels = _doa_labels_deg[:, :_nb_classes]
    _ele_labels = _doa_labels_deg[:, _nb_classes:]

    _output_dict = {}
    for _frame_ind in range(_sed_labels.shape[0]):
        _tmp_ind = np.where(_sed_labels[_frame_ind, :])
        if len(_tmp_ind[0]):
            _output_dict[_frame_ind] = []
            for _tmp_class in _tmp_ind[0]:
                _output_dict[_frame_ind].append([_tmp_class, _azi_labels[_frame_ind, _tmp_class], _ele_labels[_frame_ind, _tmp_class]])
    return _output_dict


def classification_label_format_to_output_format(_feat_cls, _labels):
    _output_dict = {}
    for _frame_ind in range(_labels.shape[0]):
        _tmp_class_ind = np.where(_labels[_frame_ind].sum(1))
        if len(_tmp_class_ind[0]):
            _output_dict[_frame_ind] = []
            for _tmp_class in _tmp_class_ind[0]:
                _tmp_spatial_ind = np.where(_labels[_frame_ind, _tmp_class])
                for _tmp_spatial in _tmp_spatial_ind[0]:
                    _azi, _ele = _feat_cls.get_matrix_index(_tmp_spatial)
                    _output_dict[_frame_ind].append(
                        [_tmp_class, _azi, _ele])

    return _output_dict


def description_file_to_output_format(_desc_file_dict, _unique_classes, _hop_length_sec):

    _output_dict = {}
    for _ind, _tmp_start_sec in enumerate(_desc_file_dict['start']):
        _tmp_class = _unique_classes[_desc_file_dict['class'][_ind]]
        _tmp_azi = _desc_file_dict['azi'][_ind]
        _tmp_ele = _desc_file_dict['ele'][_ind]
        _tmp_end_sec = _desc_file_dict['end'][_ind]

        _start_frame = int(_tmp_start_sec / _hop_length_sec)
        _end_frame = int(_tmp_end_sec / _hop_length_sec)
        for _frame_ind in range(_start_frame, _end_frame + 1):
            if _frame_ind not in _output_dict:
                _output_dict[_frame_ind] = []
            _output_dict[_frame_ind].append([_tmp_class, _tmp_azi, _tmp_ele])

    return _output_dict


def load_output_format_file(_output_format_file):
    _output_dict = {}
    _fid = open(_output_format_file, 'r')
    # next(_fid)
    for _line in _fid:
        _words = _line.strip().split(',')
        _frame_ind = int(_words[0])
        if _frame_ind not in _output_dict:
            _output_dict[_frame_ind] = []
        _output_dict[_frame_ind].append([int(_words[1]), int(_words[2]), int(_words[3])])
    _fid.close()
    return _output_dict


def write_output_format_file(_output_format_file, _output_format_dict):
    _fid = open(_output_format_file, 'w')
    
    for _frame_ind in _output_format_dict.keys():
        for _value in _output_format_dict[_frame_ind]:
            _fid.write('{},{},{},{}\n'.format(int(_frame_ind), int(_value[0]), int(_value[1]), int(_value[2])))
    _fid.close()

# SELD

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plot
from keras.models import load_model
import time
cls_data_generator = DataGenerator
cls_feature_class = FeatureClass

plot.switch_backend('agg')


def collect_test_labels(_data_gen_test, _data_out, quick_test):
    # Collecting ground truth for test data
    nb_batch = 2 if quick_test else _data_gen_test.get_total_batches_in_data()

    batch_size = _data_out[0][0]
    gt_sed = np.zeros((nb_batch * batch_size, _data_out[0][1], _data_out[0][2]))
    gt_doa = np.zeros((nb_batch * batch_size, _data_out[0][1], _data_out[1][2]))

    print("nb_batch in test: {}".format(nb_batch))
    cnt = 0
    for tmp_feat, tmp_label in _data_gen_test.generate():
        gt_sed[cnt * batch_size:(cnt + 1) * batch_size, :, :] = tmp_label[0]
        gt_doa[cnt * batch_size:(cnt + 1) * batch_size, :, :] = tmp_label[1]
        cnt = cnt + 1
        if cnt == nb_batch:
            break
    return gt_sed.astype(int), gt_doa


def plot_functions(fig_name, _tr_loss, _val_loss, _sed_loss, _doa_loss, _epoch_metric_loss):
    plot.figure()
    nb_epoch = len(_tr_loss)
    plot.subplot(311)
    plot.plot(range(nb_epoch), _tr_loss, label='train loss')
    plot.plot(range(nb_epoch), _val_loss, label='val loss')
    plot.legend()
    plot.grid(True)

    plot.subplot(312)
    plot.plot(range(nb_epoch), _sed_loss[:, 0], label='sed er')
    plot.plot(range(nb_epoch), _sed_loss[:, 1], label='sed f1')
    plot.plot(range(nb_epoch), _doa_loss[:, 0]/180., label='doa er / 180')
    plot.plot(range(nb_epoch), _doa_loss[:, 1], label='doa fr')
    plot.plot(range(nb_epoch), _epoch_metric_loss, label='seld')
    plot.legend()
    plot.grid(True)

    plot.subplot(313)
    plot.plot(range(nb_epoch), _doa_loss[:, 2], label='pred_pks')
    plot.plot(range(nb_epoch), _doa_loss[:, 3], label='good_pks')
    plot.legend()
    plot.grid(True)

    plot.savefig(fig_name)
    plot.close()


def main(argv):
    
    if len(argv) != 3:
        print('\n\n')
        print('-------------------------------------------------------------------------------------------------------')
        print('The code expected two optional inputs')
        print('\t>> python seld.py <task-id> <job-id>')
        print('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py')
        print('Using default inputs for now')
        print('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
              'You can use any number or string for this.')
        print('-------------------------------------------------------------------------------------------------------')
        print('\n\n')

    # use parameter set defined by user
    task_id = '1' if len(argv) < 2 else argv[1]
    params = get_params(task_id)

    job_id = 1 if len(argv) < 3 else argv[-1]

    train_splits, val_splits, test_splits = None, None, None
    if params['mode'] == 'dev':
        test_splits = [3]
        val_splits = [2]
        train_splits = [[1]]


    elif params['mode'] == 'eval':
        test_splits = [0]
        val_splits = [1]
        train_splits = [[2, 3, 4]]

    avg_scores_val = []
    avg_scores_test = []
    for split_cnt, split in enumerate(test_splits):
        print('\n\n---------------------------------------------------------------------------------------------------')
        print('------------------------------------      SPLIT {}   -----------------------------------------------'.format(split))
        print('---------------------------------------------------------------------------------------------------')

        # Unique name for the run
        create_folder(params['model_dir'])
        unique_name = '{}_{}_{}_{}_split{}'.format(
            task_id, job_id, params['dataset'], params['mode'], split
        )
        unique_name = os.path.join(params['model_dir'], unique_name)
        model_name = '{}_model.h5'.format(unique_name)
        print("unique_name: {}\n".format(unique_name))

        # Load train and validation data
        print('Loading training dataset:')
        data_gen_train = DataGenerator(
            dataset=params['dataset'], split=train_splits[split_cnt], batch_size=params['batch_size'],
            seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir']
        )


        print('Loading validation dataset:')
        data_gen_val = DataGenerator(
            dataset=params['dataset'], split=val_splits[split_cnt], batch_size=params['batch_size'],
            seq_len=params['sequence_length'], feat_label_dir=params['feat_label_dir'], shuffle=False
        )

        
        data_in, data_out = data_gen_train.get_data_sizes()
        print('FEATURES:\n\tdata_in: {}\n\tdata_out: {}\n'.format(data_in, data_out))

        gt = collect_test_labels(data_gen_val, data_out, params['quick_test'])
        sed_gt = reshape_3Dto2D(gt[0])
        doa_gt = reshape_3Dto2D(gt[1])
    

        
        nb_classes = data_gen_train.get_nb_classes()
        def_elevation = data_gen_train.get_default_elevation()
        doa_gt[:, nb_classes:] = doa_gt[:, nb_classes:] / (180. / def_elevation)
        

        print('MODEL:\n\tdropout_rate: {}\n\tCNN: nb_cnn_filt: {}, pool_size{}\n\trnn_size: {}, fnn_size: {}\n'.format(
            params['dropout_rate'], params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'],
            params['fnn_size']))
        

        model = get_model(data_in=data_in, data_out=data_out, dropout_rate=params['dropout_rate'],
                                      nb_cnn2d_filt=params['nb_cnn2d_filt'], pool_size=params['pool_size'],
                                      rnn_size=params['rnn_size'], fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'])
       
        best_seld_metric = 99999
        best_epoch = -1
        patience_cnt = 0
        seld_metric = np.zeros(params['nb_epochs'])
        tr_loss = np.zeros(params['nb_epochs'])
        val_loss = np.zeros(params['nb_epochs'])
        doa_metric = np.zeros((params['nb_epochs'], 6))
        sed_metric = np.zeros((params['nb_epochs'], 2))
        nb_epoch = 2 if params['quick_test'] else params['nb_epochs']
        

        # start training
        for epoch_cnt in range(nb_epoch):
            start = time.time()

            # train once per epoch
            hist = model.fit_generator(
                generator=data_gen_train.generate(),
                steps_per_epoch=2 if params['quick_test'] else data_gen_train.get_total_batches_in_data(),
                validation_data=data_gen_val.generate(),
                validation_steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(),
                epochs=params['epochs_per_fit'],
                verbose=2
            )
            tr_loss[epoch_cnt] = hist.history.get('loss')[-1]
            val_loss[epoch_cnt] = hist.history.get('val_loss')[-1]

            # predict once per epoch
            pred = model.predict_generator(
                generator=data_gen_val.generate(),
                steps=2 if params['quick_test'] else data_gen_val.get_total_batches_in_data(),
                verbose=2
            )

            # Calculate the metrics
            sed_pred = reshape_3Dto2D(pred[0]) > 0.5
            doa_pred = reshape_3Dto2D(pred[1])

            # rescaling the elevation data from [-180 180] to [-def_elevation def_elevation] for scoring purpose
            doa_pred[:, nb_classes:] = doa_pred[:, nb_classes:] / (180. / def_elevation)

            sed_metric[epoch_cnt, :] = evaluation_metrics.compute_sed_scores(sed_pred, sed_gt, data_gen_val.nb_frames_1s())
            doa_metric[epoch_cnt, :] = evaluation_metrics.compute_doa_scores_regr(doa_pred, doa_gt, sed_pred, sed_gt)
            seld_metric[epoch_cnt] = evaluation_metrics.compute_seld_metric(sed_metric[epoch_cnt, :], doa_metric[epoch_cnt, :])

            # Visualize the metrics with respect to epochs
            plot_functions(unique_name, tr_loss, val_loss, sed_metric, doa_metric, seld_metric)

            patience_cnt += 1
            if seld_metric[epoch_cnt] < best_seld_metric:
                best_seld_metric = seld_metric[epoch_cnt]
                best_epoch = epoch_cnt
                model.save(model_name)
                patience_cnt = 0

            print(
                'epoch_cnt: %d, time: %.2fs, tr_loss: %.2f, val_loss: %.2f, '
                'ER_overall: %.2f, F1_overall: %.2f, '
                'doa_error_pred: %.2f, good_pks_ratio:%.2f, '
                'seld_score: %.2f, best_seld_score: %.2f, best_epoch : %d\n' %
                (
                    epoch_cnt, time.time() - start, tr_loss[epoch_cnt], val_loss[epoch_cnt],
                    sed_metric[epoch_cnt, 0], sed_metric[epoch_cnt, 1],
                    doa_metric[epoch_cnt, 0], doa_metric[epoch_cnt, 1],
                    seld_metric[epoch_cnt], best_seld_metric, best_epoch
                )
            )
            if patience_cnt > params['patience']:
                break

        avg_scores_val.append([sed_metric[best_epoch, 0], sed_metric[best_epoch, 1], doa_metric[best_epoch, 0],
                               doa_metric[best_epoch, 1], best_seld_metric])
        print('\nResults on validation split:')
        print('\tUnique_name: {} '.format(unique_name))
        print('\tSaved model for the best_epoch: {}'.format(best_epoch))
        print('\tSELD_score: {}'.format(best_seld_metric))
        print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(doa_metric[best_epoch, 0],
                                                                      doa_metric[best_epoch, 1]))
        print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(sed_metric[best_epoch, 0],
                                                                       sed_metric[best_epoch, 1]))

        # ------------------  Calculate metric scores for unseen test split ---------------------------------
        print('Loading testing dataset:')
        data_gen_test = DataGenerator(
            dataset=params['dataset'], split=split, batch_size=params['batch_size'], seq_len=params['sequence_length'],
            feat_label_dir=params['feat_label_dir'], shuffle=False, per_file=params['dcase_output'],
            is_eval=True if params['mode'] is 'eval' else False
        )

        print('\nLoading the best model and predicting results on the testing split')
        model = load_model('{}_model.h5'.format(unique_name))
        pred_test = model.predict_generator(
            generator=data_gen_test.generate(),
            steps=2 if params['quick_test'] else data_gen_test.get_total_batches_in_data(),
            verbose=2
        )

        test_sed_pred = reshape_3Dto2D(pred_test[0]) > 0.5
        test_doa_pred = reshape_3Dto2D(pred_test[1])

        
        test_doa_pred[:, nb_classes:] = test_doa_pred[:, nb_classes:] / (180. / def_elevation)

        if params['dcase_output']:
            
            dcase_dump_folder = os.path.join(params['dcase_dir'], '{}_{}_{}'.format(task_id, params['dataset'], params['mode']))
            create_folder(dcase_dump_folder)
            print('Dumping recording-wise results in: {}'.format(dcase_dump_folder))

            test_filelist = data_gen_test.get_filelist()
            
            max_frames_with_content = data_gen_test.get_nb_frames()

           
            frames_per_file = data_gen_test.get_frame_per_file()

            for file_cnt in range(test_sed_pred.shape[0]//frames_per_file):
                output_file = os.path.join(dcase_dump_folder, test_filelist[file_cnt].replace('.npy', '.csv'))
                dc = file_cnt * frames_per_file
                output_dict = evaluation_metrics.regression_label_format_to_output_format(
                    data_gen_test,
                    test_sed_pred[dc:dc + max_frames_with_content, :],
                    test_doa_pred[dc:dc + max_frames_with_content, :] * 180 / np.pi
                )
                evaluation_metrics.write_output_format_file(output_file, output_dict)

        if params['mode'] is 'dev':
            test_data_in, test_data_out = data_gen_test.get_data_sizes()
            test_gt = collect_test_labels(data_gen_test, test_data_out, params['quick_test'])
            test_sed_gt = reshape_3Dto2D(test_gt[0])
            test_doa_gt = reshape_3Dto2D(test_gt[1])
            
            test_doa_gt[:, nb_classes:] = test_doa_gt[:, nb_classes:] / (180. / def_elevation)

            test_sed_loss = evaluation_metrics.compute_sed_scores(test_sed_pred, test_sed_gt, data_gen_test.nb_frames_1s())
            test_doa_loss = evaluation_metrics.compute_doa_scores_regr(test_doa_pred, test_doa_gt, test_sed_pred, test_sed_gt)
            test_metric_loss = evaluation_metrics.compute_seld_metric(test_sed_loss, test_doa_loss)

            avg_scores_test.append([test_sed_loss[0], test_sed_loss[1], test_doa_loss[0], test_doa_loss[1], test_metric_loss])
            print('Results on test split:')
            print('\tSELD_score: {},  '.format(test_metric_loss))
            print('\tDOA Metrics: DOA_error: {}, frame_recall: {}'.format(test_doa_loss[0], test_doa_loss[1]))
            print('\tSED Metrics: ER_overall: {}, F1_overall: {}\n'.format(test_sed_loss[0], test_sed_loss[1]))

    print('\n\nValidation split scores per fold:\n')
    for cnt in range(len(val_splits)):
        print('\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'.format(cnt, avg_scores_val[cnt][0], avg_scores_val[cnt][1], avg_scores_val[cnt][2], avg_scores_val[cnt][3], avg_scores_val[cnt][4]))

    if params['mode'] is 'dev':
        print('\n\nTesting split scores per fold:\n')
        for cnt in range(len(val_splits)):
            print('\tSplit {} - SED ER: {} F1: {}; DOA error: {} frame recall: {}; SELD score: {}'.format(cnt, avg_scores_test[cnt][0], avg_scores_test[cnt][1], avg_scores_test[cnt][2], avg_scores_test[cnt][3], avg_scores_test[cnt][4]))


if __name__ == "__main__":
    try:
        sys.exit(main(sys.argv))
    except (ValueError, IOError) as e:
        sys.exit(e)



# Visualizing Output

In [None]:


import os
import PyQt5
import numpy as np
import librosa.display
import sys
sys.path.append(os.path.join(sys.path[0], '..'))
from metrics import evaluation_metrics
import cls_feature_class
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plot
plot.switch_backend('Qt4Agg')



def collect_classwise_data(_in_dict):
    _out_dict = {}
    for _key in _in_dict.keys():
        for _seld in _in_dict[_key]:
            if _seld[0] not in _out_dict:
                _out_dict[_seld[0]] = []
            _out_dict[_seld[0]].append([_key, _seld[0], _seld[1], _seld[2]])
    return _out_dict


def plot_func(plot_data, hop_len_s, ind, plot_x_ax=False):
    cmap = ['b', 'r', 'g', 'y', 'k', 'c', 'm', 'b', 'r', 'g', 'y', 'k', 'c', 'm']
    for class_ind in plot_data.keys():
        time_ax = np.array(plot_data[class_ind])[:, 0] *hop_len_s
        y_ax = np.array(plot_data[class_ind])[:, ind]
        plot.plot(time_ax, y_ax, marker='.', color=cmap[class_ind], linestyle='None', markersize=4)
    plot.grid()
    plot.xlim([0, 60])
    if not plot_x_ax:
        plot.tick_params(
            axis='x',  
            which='both',  
            bottom='off',  
            top='off',  
            labelbottom='off')  


# --------------------------------- MAIN SCRIPT STARTS HERE -----------------------------------------

hop_s = 0.02

pred = 'D:\sap\seld-dcase2019-master\dcase\mic_dev\split1_ir0_ov1_2.csv'


ref_dir = 'D:\sap\metadata_dev'
aud_dir = 'D:\sap\mic_dev'

pred_dict = evaluation_metrics.load_output_format_file(pred)

feat_cls = cls_feature_class.FeatureClass()
ref_filename = os.path.basename(pred)
ref_desc_dict = feat_cls.read_desc_file(os.path.join(ref_dir, ref_filename), in_sec=True)
ref_dict = evaluation_metrics.description_file_to_output_format(ref_desc_dict, feat_cls.get_classes(), hop_s)


pred_data = collect_classwise_data(pred_dict)
ref_data = collect_classwise_data(ref_dict)

nb_classes = len(feat_cls.get_classes())

ref_filename = os.path.basename(pred).replace('.csv', '.wav')
audio, fs = feat_cls._load_audio(os.path.join(aud_dir, ref_filename))
stft = np.abs(np.squeeze(feat_cls._spectrogram(audio[:, :1])))
stft = librosa.amplitude_to_db(stft, ref=np.max)

plot.figure()
gs = gridspec.GridSpec(4, 4)
ax0 = plot.subplot(gs[0, 1:3]), librosa.display.specshow(stft.T, sr=fs, x_axis='time', y_axis='linear'), plot.title('Spectrogram')
ax1 = plot.subplot(gs[1, :2]), plot_func(ref_data, hop_s, ind=1), plot.ylim([-1, nb_classes + 1]), plot.title('SED reference')
ax2 = plot.subplot(gs[1, 2:]), plot_func(pred_data, hop_s, ind=1), plot.ylim([-1, nb_classes + 1]), plot.title('SED predicted')
ax3 = plot.subplot(gs[2, :2]), plot_func(ref_data, hop_s, ind=2), plot.ylim([-190, 190]), plot.title('Azimuth DOA reference')
ax4 = plot.subplot(gs[2, 2:]), plot_func(pred_data, hop_s, ind=2), plot.ylim([-190, 190]), plot.title('Azimuth DOA predicted')
ax5 = plot.subplot(gs[3, :2]), plot_func(ref_data, hop_s, ind=3, plot_x_ax=True), plot.ylim([-50, 50]), plot.title('Elevation DOA reference')
ax6 = plot.subplot(gs[3, 2:]), plot_func(pred_data, hop_s, ind=3, plot_x_ax=True), plot.ylim([-50, 50]), plot.title('Elevation DOA predicted')
ax_lst = [ax0, ax1, ax2, ax3, ax4, ax5, ax6]
plot.show()



#SELD Metrics

In [None]:
import os
from metrics import evaluation_metrics
import cls_feature_class
import numpy as np


def get_nb_files(_pred_file_list, _group='split'):
    _group_ind = {'split': 5, 'ir': 9, 'ov': 13}
    _cnt_dict = {}
    for _filename in _pred_file_list:

        if _group == 'all':
            _ind = 0
        else:
            _ind = int(_filename[_group_ind[_group]])

        if _ind not in _cnt_dict:
            _cnt_dict[_ind] = []
        _cnt_dict[_ind].append(_filename)

    return _cnt_dict



ref_desc_files = 'D:\sap\metadata_dev' 
pred_output_format_files = 'D:\sap\results\2_mic_dev' 

feat_cls = cls_feature_class.FeatureClass()
max_frames = feat_cls.get_nb_frames()
unique_classes = feat_cls.get_classes()
nb_classes = len(unique_classes)
azi_list, ele_list = feat_cls.get_azi_ele_list()

ref_files = os.listdir(ref_desc_files)
nb_ref_files = len(ref_files)

pred_files = os.listdir(pred_output_format_files)
nb_pred_files = len(pred_files)

if nb_ref_files != nb_pred_files:
    print('ERROR: Mismatch. Reference has {} and prediction has {} files'.format(nb_ref_files, nb_pred_files))
    exit()

eval = evaluation_metrics.SELDMetrics(nb_frames_1s=feat_cls.nb_frames_1s(), data_gen=feat_cls)


score_type_list = [ 'all', 'split', 'ov', 'ir']

print('\nCalculating {} scores for {}'.format(score_type_list, os.path.basename(pred_output_format_files)))

for score_type in score_type_list:
    print('\n\n---------------------------------------------------------------------------------------------------')
    print('------------------------------------  {}   ---------------------------------------------'.format('Total score' if score_type=='all' else 'score per {}'.format(score_type)))
    print('---------------------------------------------------------------------------------------------------')

    split_cnt_dict = get_nb_files(pred_files, _group=score_type) 

    
    for split_key in np.sort(list(split_cnt_dict)):
        eval.reset()    
        for pred_cnt, pred_file in enumerate(split_cnt_dict[split_key]):
            
            pred_dict = evaluation_metrics.load_output_format_file(os.path.join(pred_output_format_files, pred_file))

            
            gt_desc_file_dict = feat_cls.read_desc_file(os.path.join(ref_desc_files, pred_file.replace('.npy', '.csv')))

            
            gt_labels = feat_cls.get_clas_labels_for_file(gt_desc_file_dict)
            pred_labels = evaluation_metrics.output_format_dict_to_classification_labels(pred_dict, feat_cls)

            
            eval.update_sed_scores(pred_labels.max(2), gt_labels.max(2))
            eval.update_doa_scores(pred_labels, gt_labels)

        
        er, f = eval.compute_sed_scores()
        doa_err, frame_recall = eval.compute_doa_scores()
        seld_scr = evaluation_metrics.compute_seld_metric([er, f], [doa_err, frame_recall])

        print('\nAverage score for {} {} data'.format(score_type, 'fold' if score_type=='all' else split_key))
        print('SELD score: {}'.format(seld_scr))
        print('SED metrics: er: {}, f:{}'.format(er, f))
        print('DOA metrics: doa error: {}, frame recall:{}'.format(doa_err, frame_recall))

