# Wave classification using BERT

In [2]:
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# import tensorflow as tf
# gpus = tf.config.experimental.list_physical_devices('GPU')
# tf.config.experimental.set_memory_growth(gpus[0], True)

# tf.config.experimental.set_virtual_device_configuration(
#           gpus[0],
#             [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024 * 32)])

In [3]:
import os
import functools

import tensorflow as tf
from tensorflow.python.ops.signal import shape_ops
import numpy as np


In [4]:
fold_index = 0
task = 'home'
model_type = 'attention'

model_dir = './model'
data_dir = './data'



In [5]:
# tf.config.experimental.set_memory_growth(
#     tf.config.list_physical_devices('GPU')[0], allow_memory_growth
# )

In [6]:
from pathlib import Path
model_dir = Path(model_dir)
data_dir = Path(data_dir)
model_dir.mkdir(exist_ok=True, parents=True)

In [7]:
# Load training setup
import pandas as pd
from pathlib import Path
from libs.misc import wavio
from tqdm.notebook import tqdm

root_dir = data_dir / 'evaluation_setup'

data_cache = {}

def get_annotation(task, fold_index, target):
    df = pd.read_csv(
        root_dir / f'{task}_fold{fold_index+1}_{target}.txt', sep='\t', 
        header=None, names=['file', 'class', 'start', 'end', 'event']
    )
    df['id'] = df['file'].apply(lambda x: Path(x).stem)
    return df

def load_dataset(target):
    if target not in data_cache:
        df = get_annotation(task, fold_index, target)
        wav_dict = {}
        for file in tqdm(df['file'].unique()):
            wav_dict[Path(file).stem] = wavio.readwav(str(data_dir / file))
        data_cache[target] = (df, wav_dict)
        return df, wav_dict
    else:
        return data_cache[target]
df, wav_dict = load_dataset('train')

  0%|          | 0/7 [00:00<?, ?it/s]

In [8]:
from itertools import product
import numpy as np

sound_events = ['nan']

for fold, task_name in product([0,1,2,3], ['train', 'evaluate', 'test']):
    sound_events.extend(get_annotation('home', fold, task_name)['event'].unique())
sound_events = list(filter(lambda x: x != 'nan', np.unique(sound_events)))
sound_events = ['nan'] + sound_events
print(sound_events)

['nan', '(object) rustling', '(object) snapping', 'cupboard', 'cutlery', 'dishes', 'drawer', 'glass jingling', 'object impact', 'people walking', 'washing dishes', 'water tap running']


In [9]:
from functools import partial

n_augmentation = 5
perturbation = (0.0, 0.05)

def parse_wave(series, wav_dict, start=0.0, end=5.0, duration=5.0):    
    result = series.to_dict()
    sr, bw, audio = wav_dict[series['id']]
    
    data = np.zeros((int(duration * sr), 2), dtype=np.float32)
    mask = np.zeros((int(duration * sr), 1), dtype=np.bool)
    
    start = int(start * sr)
    end = int(end * sr)
    data[start:end] = audio[start:end]
    mask[start:end] = True
    
    result['id'] = series['id']
    result['start_index'] = max(start, 0)
    result['end_index'] = min(end, len(audio))
    result['sr'] = sr
    result['bw'] = bw
    result['audio'] = data
    result['mask'] = mask
    
    return pd.Series(result)

_id = df.loc[0, 'id']
sr = wav_dict[_id][0]
# display(Audio(wav_dict[_id][2][:, 0], rate=sr))
# display(Audio(
#     parse_wave(df.loc[0], wav_dict=wav_dict)['audio'][:, 0], rate=sr
# ))
# audio_df = df.apply(partial(parse_wave, wav_dict=wav_dict, window_size=5.0, hop_size=1.25), axis=1)

In [10]:
cache = {}

In [42]:
from functools import partial
import librosa
import joblib
import random 

n_mels = 128
n_sampling=4096
hop_length = n_sampling // 4
window='hann'
pad='constant'
max_time = 0.5
batch_size = 2

def preprocess(wav, sampling_rate):
    return np.concatenate([
        mono_preprocess(wav[..., 0], sampling_rate)[..., np.newaxis],
        mono_preprocess(wav[..., 1], sampling_rate)[..., np.newaxis],
    ], axis=-1)

def mono_preprocess(wav, sampling_rate):
    mag = librosa.feature.melspectrogram(
        wav, sr=sampling_rate, hop_length=hop_length, n_mels=n_mels,
        fmin=0.0, fmax=20e3,
    )
    logmag = np.log(mag + 1e8)
    return logmag

def normalize_time(audio_df, max_time):
    results = []
    for audio, sr in zip(audio_df['audio'], audio_df['sr']):
        max_len = int(sr * max_time)

        pos = min(len(audio), max_len)
        result = np.zeros((max_len, 2), np.float32)
        result[:pos, :] = audio[:pos]
        results.append(result)
    return results


def get_dataset(target):
    df, wav_dict = load_dataset(target)
    encoding_dict = {event: i for i, event in enumerate(sound_events)}
    
    def yield_wave():
        def _parse_wave(series):
            start = random.uniform(0.0, 0.1 * max_time)
            end = random.uniform(0.1 * max_time, max_time)
            end = end if (max_time - start) > (start + end) else (max_time - start)
            return parse_wave(series, wav_dict, start=start, end=end, duration=max_time)
        
        def normalize(audio):
            max_value = np.amax(audio)
            min_value = np.amin(audio)
            return (audio - min_value) / (max_value - min_value)
        
        for i in df.index:
            series = _parse_wave(df.loc[i])
            event = encoding_dict[series['event']]
            indices = np.linspace(0, series['audio'].shape[0] // 16 * 16, int(max_time * 16000), dtype=np.int32)
            yield normalize(series['audio'])[indices, :], series['mask'][indices, :], event
    return yield_wave
    
def get_tf_dataset(target, batch_size=32, shuffle=False, **kwargs):
    ds = tf.data.Dataset.from_generator(
        get_dataset(target, **kwargs),
        output_types=(tf.float32, tf.bool, tf.int32)
    )
    if shuffle:
        ds = ds.shuffle(buffer_size=1000, reshuffle_each_iteration=True)
    ds = ds.batch(batch_size, drop_remainder=shuffle)
    return ds

In [43]:
train_dataset = get_tf_dataset('train', batch_size=batch_size, shuffle=True)
test_dataset = get_tf_dataset('evaluate', batch_size=batch_size)

In [44]:
next(train_dataset.as_numpy_iterator())


(array([[[0.5414052 , 0.5414052 ],
         [0.5414052 , 0.5414052 ],
         [0.5414052 , 0.5414052 ],
         ...,
         [0.5414052 , 0.5414052 ],
         [0.5414052 , 0.5414052 ],
         [0.5414052 , 0.5414052 ]],
 
        [[0.42060146, 0.42060146],
         [0.42060146, 0.42060146],
         [0.42060146, 0.42060146],
         ...,
         [0.42060146, 0.42060146],
         [0.42060146, 0.42060146],
         [0.42060146, 0.42060146]]], dtype=float32),
 array([[[False],
         [False],
         [False],
         ...,
         [False],
         [False],
         [False]],
 
        [[False],
         [False],
         [False],
         ...,
         [False],
         [False],
         [False]]]),
 array([4, 1]))

In [45]:
from collections import defaultdict

class PlotCallback(tf.keras.callbacks.Callback):
    is_higher_better = {
        'accuracy'
    }
    is_linear = {
        'accuracy',
        'sparse_categorical_accuracy',
    }
    def __init__(self, targets=None, n_step=1):
        super().__init__()
        self.fig = None
        self.axes = None
        self.axes_index = {}
        self.n_step = n_step
        self.targets = targets
        self.epochs = []
        self.history = defaultdict(list)

    def plot_and_display(self):
        for ax in self.axes.flat:
            ax.clear()
        for i, (label, values) in enumerate(self.history.items()):
            if any(name in label for name in self.is_higher_better):
                get_best_value = np.amax
            else:
                get_best_value = np.amin
            
            if label.startswith('val_'):
                _label = label[4:]
            else:
                _label = label
            
            ax = self.axes.flat[self.axes_index[_label]]
            ax.plot(self.epochs, values, label=label, color=f'C{i}')
            best_value = get_best_value(values)
            ax.axhline(best_value, linestyle='--', color=f'C{i}')
            ax.text(0.0, best_value, f'{best_value:.3f}')
            
            if _label not in self.is_linear:
                ax.set_yscale('log')

        if self.epochs[-1] == 0:
            self.fig.legend()

        io = BytesIO()
        self.fig.savefig(io, format='png')

        clear_output(wait=True)
        display_png(Image(io.getvalue()))
        
    def on_epoch_end(self, epoch, logs):
        if epoch == 0:
            self.fig, self.axes = plt.subplots(len(logs) // 2, 1, figsize=(8, 4 * len(logs) // 2))
            self.axes_index = {}
            for label in logs:
                if label.startswith('val_'):
                    _label = label[4:]
                else:
                    _label = label
                if _label not in self.axes_index:
                    self.axes_index[_label] = len(self.axes_index)

        for key, value in logs.items():
            self.history[key].append(value)

        self.epochs.append(epoch)
        if (epoch % self.n_step) == 0:
            self.plot_and_display()

class BalancedSparseCategoricalAccuracy(tf.keras.metrics.SparseCategoricalAccuracy):
    def __init__(self, name='balanced_sparse_categorical_accuracy', dtype=None):
        super().__init__(name, dtype=dtype)

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_flat = y_true
        if y_true.shape.ndims == y_pred.shape.ndims:
            y_flat = tf.squeeze(y_flat, axis=[-1])
        y_true_int = tf.cast(y_flat, tf.int32)

        cls_counts = tf.math.bincount(y_true_int)
        cls_counts = tf.math.reciprocal_no_nan(tf.cast(cls_counts, self.dtype))
        weight = tf.gather(cls_counts, y_true_int)
        return super().update_state(y_true, y_pred, sample_weight=weight)
    
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)

        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [56]:
import tensorflow as tf
from bigbird.core.encoder import EncoderStack
from bigbird.core import utils
from dataclasses import dataclass
from typing import Union

@dataclass
class EncoderConfig:
    hidden_size: int = 768
    seq_size: int = 4096
    intermediate_size: int = 3072
    hidden_mask_prob: float = 0.0 # Dropout probability for training
    num_attention_heads: int = 12
    num_hidden_layers: int = 8

    embedding_kernel:int = None # If None, use positional encoding instead of convolution.
    clip_embedding:int = None # Clip input value
    
def get_angles(pos, i, d_model):
    angle_rates = 1.0 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    return pos * angle_rates

def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)

    # 配列中の偶数インデックスにはsinを適用; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # 配列中の奇数インデックスにはcosを適用; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

class WaveEncoder(tf.keras.Model):
    def __init__(self, config: Union[EncoderConfig,dict]):
        super().__init__()

        if isinstance(config, dict):
            config = EncoderConfig(**config)
        self.config = config
        
        embedding_layers = []
        if config.hidden_mask_prob > 0:
            embedding_layers.append(tf.keras.layers.Dropout(config.hidden_mask_prob))
        if config.embedding_kernel is not None:
            embedding_layers.append(
                tf.keras.layers.Conv1D(config.hidden_size, kernel_size=config.embedding_kernel)
            )
        
        self.embedding = tf.keras.Sequential(embedding_layers)
        self.positional_encoding = positional_encoding(config.seq_size, config.hidden_size)

        _config = utils.get_default_config()
        _config.update({
            'num_attention_heads': config.num_attention_heads, 
            'num_hidden_layers': config.num_hidden_layers,
            'intermediate_size': config.intermediate_size,
            'hidden_size': config.hidden_size
        })
        self.encoder = EncoderStack(_config)

    def call(self, xs, mask=None, training=False):
        assert mask is not None
        embedding_output = self.embedding(xs, training=training)
        sequence_output = self.encoder(embedding_output, mask, training=training)
        return sequence_output

class PretrainingModel(tf.keras.Model):
    def __init__(self, n_ch: int, config:EncoderConfig = EncoderConfig()):
        super().__init__()
        self.model = tf.keras.Sequential([
            WaveEncoder(config),
            tf.keras.layers.Conv1D(64, kernel_size=21, padding='SAME'),
            tf.keras.layers.Conv1D(64, kernel_size=11, padding='SAME'),
            tf.keras.layers.Conv1D(n_ch, kernel_size=3, padding='SAME'),
            tf.keras.layers.Activation('tanh'),
        ])

    def call(self, xs, training=False, mask=None):
        if mask is None:
            mask = tf.ones_like(xs)
        return self.model(xs, mask=mask, training=training)

    def make_random_mask(self, xs):
        prob = tf.random.uniform((xs.shape[0], xs.shape[1], 1), 0.0, 1.0)
        drop_mask = prob <= 0.1
        random_mask = tf.math.logical_and(prob > 0.1, prob < 0.9)
        estimate_mask = prob > 0.1

        xs = tf.where(drop_mask, xs, 0.0)
        xs = tf.where(random_mask, xs, tf.random.uniform(tf.shape(xs), 0.0, 1.0))
        return xs, estimate_mask[..., 0]

    def train_step(self, data):
        xs = data
        with tf.GradientTape() as tape:
            xs_input, mask = self.make_random_mask(xs)
            ys_pred = self(xs_input, mask=mask, training=True)  # Forward pass
            # Compute the loss value.
            # The loss function is configured in `compile()`.
            
            ys = tf.boolean_mask(xs, mask)
            ys_pred = tf.boolean_mask(ys_pred, mask)
            loss = self.compiled_loss(
                ys, ys_pred,
                regularization_losses=self.losses,
            )

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(ys, ys_pred)
        return {m.name: m.result() for m in self.metrics}   

    def test_step(self, data):
        xs = data
        xs_input, mask = self.make_random_mask(xs)
        # Compute predictions
        ys_pred = self(xs_input, mask=mask, training=False)
        # print(xs.shape, mask.shape, ys_pred.shape)
        # Updates the metrics tracking the loss
        ys = tf.boolean_mask(xs, mask)
        ys_pred = tf.boolean_mask(ys_pred, mask)
        self.compiled_loss(ys, ys_pred, regularization_losses=self.losses)
        # Update the metrics.
        self.compiled_metrics.update_state(ys, ys_pred)
        # Return a dict mapping metric names to current value.
        # Note that it will include the loss (tracked in self.metrics).
        return {m.name: m.result() for m in self.metrics}


In [57]:
#pretrain
import IPython 
from collections import defaultdict 
from itertools import product
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm, trange
import time
import seaborn as sns
from io import BytesIO
import imageio
from IPython.display import Image, display_png, clear_output

learning_rate = 1e-4

if 'model' in globals():
    del model

epochs = 2000
max_attention_heads = 8

model = PretrainingModel(
    2, 
    {
        # 'attention_type': 'simulated_sparse',
        'embedding_kernel':1, 
        'seq_size': 16000,
        'intermediate_size': 1024, 
        'hidden_size': max_attention_heads * 15 ,
        "num_attention_heads": max_attention_heads,
        "num_hidden_layers": 3,
    }
)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.5, beta_2=0.99)

checkpoint_name = f'{task}_pretrain_fold{fold_index}'
cur_model_dir = model_dir / checkpoint_name
cur_model_dir.mkdir(exist_ok=True, parents=True)

def get_train_dataset(dataset):
    return dataset.map(
        lambda wave, mask, label: wave
    )
tf.config.run_functions_eagerly(False)
plot_callback = PlotCallback(n_step=3)
with tf.device('/GPU:0'):
    model.compile(
        loss=tf.keras.losses.MeanSquaredError(),
        optimizer=optimizer,
        metrics=tf.keras.losses.MeanAbsoluteError(),
    )
    
    mode = 'min'
    model.fit(
        get_train_dataset(train_dataset),
        batch_size=batch_size, epochs=2000, shuffle=True,
        validation_data=get_train_dataset(test_dataset),
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                patience=50, 
                monitor='val_loss',
                #monitor='val_balanced_sparse_categorical_accuracy',
                mode=mode
            ),
            plot_callback,
            tf.keras.callbacks.TerminateOnNaN(),
            tf.keras.callbacks.ModelCheckpoint(
                str(cur_model_dir / (checkpoint_name + '.model')),
                monitor='val_loss',
                #monitor='val_balanced_sparse_categorical_accuracy', 
                save_best_only=True,
                save_weights_only=True,
                mode=mode, 
            )
        ]
    )

plot_callback.fig.tight_layout()
plot_callback.fig.savefig(str(cur_model_dir / (checkpoint_name + '.png')))

Epoch 1/2000


TypeError: in user code:

    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    <ipython-input-56-7e2ea005bcbb>:101 train_step
        xs_input, mask = self.make_random_mask(xs)
    <ipython-input-56-7e2ea005bcbb>:89 make_random_mask
        prob = tf.random.uniform((xs.shape[0], xs.shape[1], 1), 0.0, 1.0)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\ops\random_ops.py:289 random_uniform
        shape = tensor_util.shape_tensor(shape)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\framework\tensor_util.py:1035 shape_tensor
        return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\profiler\trace.py:163 wrapped
        return func(*args, **kwargs)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1540 convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py:339 _constant_tensor_conversion_function
        return constant(v, dtype=dtype, name=name)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py:265 constant
        allow_broadcast=True)
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py:283 _constant_impl
        allow_broadcast=allow_broadcast))
    c:\users\kazbi\anaconda3\lib\site-packages\tensorflow\python\framework\tensor_util.py:553 make_tensor_proto
        "supported type." % (type(values), values))

    TypeError: Failed to convert object of type <class 'tuple'> to Tensor. Contents: (None, None, 1). Consider casting elements to a supported type.


In [None]:
# utils.get_default_config()
next(get_train_dataset(train_dataset).as_numpy_iterator()).shape

In [None]:
import IPython 
from collections import defaultdict 
from itertools import product
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm, trange
import time
import seaborn as sns
from io import BytesIO
import imageio
from IPython.display import Image, display_png, clear_output

learning_rate = 1e-4

if 'model' in globals():
    del model
epochs = 2000
base_net = create_model()
base_net.add(tf.keras.layers.Activation('softmax'))
model = WavegramCNN(base_net)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.5, beta_2=0.99)

checkpoint_name = f'{task}_cnn_{model_type}_fold{fold_index}'
cur_model_dir = model_dir / checkpoint_name
cur_model_dir.mkdir(exist_ok=True, parents=True)

plot_callback = PlotCallback(n_step=3)
with tf.device('/GPU:0'):
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=False, reduction=tf.keras.losses.Reduction.SUM),
        optimizer=optimizer,
        metrics=BalancedSparseCategoricalAccuracy(),
#         options=tf.distribute.RunOptions(report_tensor_allocations_upon_oom = True)
    )
    
    mode = 'min'
    model.fit(
        train_dataset.map(lambda *vars_list: (vars_list[0], vars_list[-1])),
        batch_size=batch_size, epochs=2000, shuffle=True,
        validation_data=test_dataset.map(lambda *vars_list: (vars_list[0], vars_list[-1])),
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                patience=50, 
                monitor='val_loss',
                #monitor='val_balanced_sparse_categorical_accuracy',
                mode=mode
            ),
            plot_callback,
            tf.keras.callbacks.TerminateOnNaN(),
            tf.keras.callbacks.ModelCheckpoint(
                str(cur_model_dir / (checkpoint_name + '.model')),
                monitor='val_loss',
                #monitor='val_balanced_sparse_categorical_accuracy', 
                save_best_only=True,
                save_weights_only=True,
                mode=mode, 
            )
        ]
    )

plot_callback.fig.tight_layout()
plot_callback.fig.savefig(str(cur_model_dir / (checkpoint_name + '.png')))

Epoch 1/2000
    155/Unknown - 5s 35ms/step - loss: 4.7111 - balanced_sparse_categorical_accuracy: 0.2301

In [None]:
from itertools import product
from sklearn.metrics import accuracy_score
accuracy_data = []
model = create_model()
model.load_weights(str(cur_model_dir / (checkpoint_name + '.model')))
model.compile()

results = {}
for target_name, dataset in zip(['train', 'test'], (train_dataset, test_dataset)):
    pred_logits = model.predict(
        dataset.map(lambda audios, norm_audios, labels: (audios, labels))
    )
    pred_labels = tf.argmax(tf.nn.softmax(pred_logits, axis=1), axis=1)
    
    audio = []
    truth_labels = []
    for batch in dataset:
        audio.extend(batch[1].numpy())
        truth_labels.extend(batch[2].numpy())
    metric = tf.reduce_mean(tf.keras.metrics.sparse_categorical_accuracy(
        tf.convert_to_tensor(np.array(truth_labels).astype(np.int32)), 
        tf.convert_to_tensor(pred_logits.astype(np.float32))
    )).numpy()

    truth_labels = np.array(sound_events).take(truth_labels)
    pred_labels = np.array(sound_events).take(pred_labels)
    
    agg_df = pd.crosstab(
        pd.Series(truth_labels, name='Truth'),
        pd.Series(pred_labels, name='Prediction'),
    )
    agg_df = agg_df.reindex(columns=sound_events, index=sound_events, fill_value=0)
    display(target_name)
    display(agg_df)
    
    accuracy = {}
    for name in sound_events:
        mask = truth_labels == name
        accuracy[name] = accuracy_score(truth_labels[mask], pred_labels[mask]) 
    accuracy['Metric'] = metric
    accuracy_data.append(pd.Series(accuracy, name=target_name))
    
    results[target_name] = {
        'Audio': None if save_without_train and target_name == 'train' else audio,
        'Prediction': pd.DataFrame({
            'Prediction': pred_labels,
            'Truth': truth_labels,
        }),
        'Agg': agg_df,
        'Accuracy': accuracy_data,
    }
accuracy_df = pd.DataFrame(accuracy_data)
accuracy_df['Mean'] = accuracy_df.mean(axis=1)
display(accuracy_df.T)

In [None]:
import cloudpickle as pickle
with open(cur_model_dir / f'result_metric.pickle', 'wb+') as fp:
    pickle.dump(results, fp)

In [75]:
save_without_train and 'train' == 'train'

True