In [1]:
import time
from collections import defaultdict
from pathlib import Path
from statistics import mean

import pandas as pd
import torch
from scipy.io import wavfile
from scipy.signal import freqz
from torch import Tensor
from tqdm.notebook import tqdm

from s4drc.src.dataset import SignalTrainDatasetModule
from s4drc.src.model import S4Model
from s4drc.src.loss import forge_validation_criterions_by
from s4drc.src.module.db import convert_to_decibel

# Model Evaluation

This Jupyter Notebook contains routine to evaluate the model.

Each individual evaluation task is wrapped in a function to prevent variables going global,
All functions will save results to the local file system.

## <a id="preparatory-work">Preparatory Work</a>

In [2]:
ALL_MODEL_DIRS = [
    Path('./experiment-result/run-01/checkpoints/epoch=41-step=61908.ckpt'),
    Path('./experiment-result/run-02/checkpoints/epoch=46-step=69278.ckpt'),
    Path('./experiment-result/run-03/checkpoints/epoch=58-step=86966.ckpt'),
    Path('./experiment-result/run-04/checkpoints/epoch=50-step=75174.ckpt'),
    Path('./experiment-result/run-05/checkpoints/epoch=13-step=20636.ckpt'),
    Path('./experiment-result/run-06/checkpoints/epoch=53-step=79596.ckpt'),
    Path('./experiment-result/run-07/checkpoints/epoch=49-step=73700.ckpt'),
    Path('./experiment-result/run-08/checkpoints/epoch=55-step=82544.ckpt'),
]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
def get_model(ckpt_dir: Path):
    return S4Model.load_from_checkpoint(ckpt_dir).eval()

def get_testing_dataloader(ckpt_dir: Path, batch_size: int, testing_segment_length: int):
    data_module = SignalTrainDatasetModule.load_from_checkpoint(ckpt_dir, batch_size=batch_size, testing_segment_length=testing_segment_length)
    data_module.prepare_data()
    return data_module.test_dataloader()

## Loss Value (CUDA)

In [4]:
@torch.no_grad()
def test():
    testing_dataloaders = {
        '10s': get_testing_dataloader(ALL_MODEL_DIRS[0], 16, 524288),  # 2 ** 19, close to 10 * 44100
        # 'long': get_testing_dataloader(ALL_MODEL_DIRS[0], 1, 8388608),  # 2 * 23, used by Steinmetz and Reiss
        'long': get_testing_dataloader(ALL_MODEL_DIRS[0], 1, 2 ** 22),  # 2 * 22, trying to match Steinmetz and Reiss
    }

    for model_dir in ALL_MODEL_DIRS:
        torch.cuda.empty_cache()

        print(f'Calculating {model_dir} model testing loss.')
        model = get_model(model_dir).to(device)
        losses = defaultdict(list)

        for testing_dataloader_name, testing_dataloader in testing_dataloaders.items():
            validation_criterions = forge_validation_criterions_by(
                model.hparams['loss_filter_coef']
            ).to(device)
            validation_losses = {
                validation_loss: 0.0
                for validation_loss in validation_criterions.keys()
            }

            for x, y, parameters in tqdm(
                testing_dataloader,
                desc=f'Testing {testing_dataloader_name} dataset.',
                total=len(testing_dataloader)
            ):
                x: Tensor = x.to(device)
                y: Tensor = y.to(device)
                parameters: Tensor = parameters.to(device)

                y_hat: Tensor = model(x, parameters)

                for validation_loss, validation_criterion in validation_criterions.items():
                    loss: Tensor = validation_criterion(y_hat.unsqueeze(1), y.unsqueeze(1))
                    validation_losses[validation_loss] += loss.item()
            
            for k, v in list(validation_losses.items()):
                validation_losses[k] = v / len(testing_dataloader)
            
            losses['dataset-name'].append(testing_dataloader_name)
            for k, v in validation_losses.items():
                losses[k].append(v)
        
        pd.DataFrame(losses).to_csv(model_dir.parent.parent / f'loss.csv')

test()

The SignalTrain dataset has been downloaded. Skipping ... 


Loading dataset from data/SignalTrain/Test.:   0%|          | 0/6 [00:00<?, ?it/s]

The SignalTrain dataset has been downloaded. Skipping ... 


Loading dataset from data/SignalTrain/Test.:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating experiment-result/run-01/checkpoints/epoch=41-step=61908.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

Calculating experiment-result/run-02/checkpoints/epoch=46-step=69278.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

Calculating experiment-result/run-03/checkpoints/epoch=58-step=86966.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

Calculating experiment-result/run-04/checkpoints/epoch=50-step=75174.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

Calculating experiment-result/run-05/checkpoints/epoch=13-step=20636.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

Calculating experiment-result/run-06/checkpoints/epoch=53-step=79596.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

Calculating experiment-result/run-07/checkpoints/epoch=49-step=73700.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

Calculating experiment-result/run-08/checkpoints/epoch=55-step=82544.ckpt model testing loss.


Testing 10s dataset.:   0%|          | 0/15 [00:00<?, ?it/s]

Testing long dataset.:   0%|          | 0/27 [00:00<?, ?it/s]

## Evaluate Direct Inference Efficiency (CUDA and CPU)

In [None]:
%%sh
lscpu

Architecture:                    x86_64
CPU op-mode(s):                  32-bit, 64-bit
Byte Order:                      Little Endian
Address sizes:                   46 bits physical, 57 bits virtual
CPU(s):                          30
On-line CPU(s) list:             0-29
Thread(s) per core:              1
Core(s) per socket:              1
Socket(s):                       30
NUMA node(s):                    1
Vendor ID:                       GenuineIntel
CPU family:                      6
Model:                           106
Model name:                      Intel(R) Xeon(R) Platinum 8358 CPU @ 2.60GHz
Stepping:                        6
CPU MHz:                         2593.898
BogoMIPS:                        5187.79
Virtualization:                  VT-x
Hypervisor vendor:               KVM
Virtualization type:             full
L1d cache:                       960 KiB
L1i cache:                       960 KiB
L2 cache:                        120 MiB
L3 cache:                        

In [None]:
@torch.no_grad()
def evaluate_inference_efficiency():
    for model_dir in ALL_MODEL_DIRS:
        print(f'Calculating {model_dir} model inference efficiency.')
        model = get_model(model_dir)
        sample_lengths: list[int] = [2 ** i for i in range(5, 20)]

        local_devices = [torch.device('cpu')]
        if torch.cuda.is_available():
            local_devices.append(torch.device('cuda'))

        real_time_ratio_dict = defaultdict(list)
        for local_device in local_devices:
            if local_device.type == 'cpu':
                print(f'Doing inference speed test on {(device_name := local_device.type).upper()}...')
            elif local_device.type == 'cuda':
                print(f'Doing inference speed test on {(device_name := torch.cuda.get_device_name())}.')
            else:
                raise NotImplementedError(f'Inference efficiency test can only run on CPU/CUDA')

            model = model.to(local_device)

            real_time_ratio_dict['device-name'].append(device_name)
            
            speed_ratios = []

            for dataset_sample_length in sample_lengths:
                dataset_sample_time_ns = dataset_sample_length * 1e9 / SignalTrainDatasetModule.sample_rate

                inference_time_ns: list[int] = []
                for _ in range(20):
                    x = torch.rand(1, dataset_sample_length).to(local_device, torch.float32)
                    cond = torch.tensor([[1, 65]]).to(local_device, torch.float32)

                    tic = time.perf_counter_ns()
                    model(x, cond)
                    toc = time.perf_counter_ns()
                    inference_time_ns.append(toc - tic)
                
                inference_time_ns_mean = mean(inference_time_ns)
                real_time_ratio = dataset_sample_time_ns / inference_time_ns_mean
                speed_ratios.append(real_time_ratio)
                real_time_ratio_dict[str(dataset_sample_length)].append(real_time_ratio)
        
        pd.DataFrame(real_time_ratio_dict).to_csv(model_dir.parent.parent / 'efficiency.csv')


evaluate_inference_efficiency()

Calculating experiment-result/run-01/checkpoints/epoch=41-step=61908.ckpt model inference efficiency.
Doing inference speed test on CPU...
Doing inference speed test on NVIDIA A10.
Calculating experiment-result/run-02/checkpoints/epoch=46-step=69278.ckpt model inference efficiency.
Doing inference speed test on CPU...
Doing inference speed test on NVIDIA A10.
Calculating experiment-result/run-03/checkpoints/epoch=58-step=86966.ckpt model inference efficiency.
Doing inference speed test on CPU...
Doing inference speed test on NVIDIA A10.
Calculating experiment-result/run-04/checkpoints/epoch=50-step=75174.ckpt model inference efficiency.
Doing inference speed test on CPU...
Doing inference speed test on NVIDIA A10.
Calculating experiment-result/run-05/checkpoints/epoch=13-step=20636.ckpt model inference efficiency.
Doing inference speed test on CPU...
Doing inference speed test on NVIDIA A10.
Calculating experiment-result/run-06/checkpoints/epoch=53-step=79596.ckpt model inference effic

## Generate Output Audio (CUDA)

In [4]:
@torch.no_grad()
def evaluate_output_audio():
    testing_dataloader = get_testing_dataloader(ALL_MODEL_DIRS[0], 16, 10 * SignalTrainDatasetModule.sample_rate)

    for model_dir in ALL_MODEL_DIRS:
        print(f'Generating {model_dir} model output audio.')
        model = get_model(model_dir).to(device)

        output_audio_dir = model_dir.parent.parent / f'output-audio-10s'
        output_audio_dir.mkdir(exist_ok=True)

        ii = 0
        for x, y, cond in tqdm(testing_dataloader, desc='Generating output audio', total=len(testing_dataloader)):
            x: Tensor = x.to(device)
            y: Tensor = y.to(device)
            cond: Tensor = cond.to(device)
            
            y_hat: Tensor = model(y, cond)
            
            for i in range(y_hat.size(0)):
                switch, peak_reduction = cond[i, :].flatten().cpu().tolist()
                prefix = f'{str(ii).zfill(3)}-switch={switch}-peak-reduction={peak_reduction}'

                x_audio = x[i, :].flatten().cpu().numpy()
                y_audio = y[i, :].flatten().cpu().numpy()
                y_hat_audio = y_hat[i, :].flatten().cpu().numpy()
                y_diff_audio = y_audio - y_hat_audio

                wavfile.write(output_audio_dir / f'{prefix}-x.wav', SignalTrainDatasetModule.sample_rate, x_audio)
                wavfile.write(output_audio_dir / f'{prefix}-y.wav', SignalTrainDatasetModule.sample_rate, y_audio)
                wavfile.write(output_audio_dir / f'{prefix}-y-hat.wav', SignalTrainDatasetModule.sample_rate, y_hat_audio)
                wavfile.write(output_audio_dir / f'{prefix}-y-diff.wav', SignalTrainDatasetModule.sample_rate, y_diff_audio)

                ii += 1

evaluate_output_audio()

The SignalTrain dataset has been downloaded. Skipping ... 


Loading dataset from data/SignalTrain/Test.:   0%|          | 0/6 [00:00<?, ?it/s]

Generating experiment-result/run-01/checkpoints/epoch=41-step=61908.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]

Generating experiment-result/run-02/checkpoints/epoch=46-step=69278.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]

Generating experiment-result/run-03/checkpoints/epoch=58-step=86966.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]

Generating experiment-result/run-04/checkpoints/epoch=50-step=75174.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]

Generating experiment-result/run-05/checkpoints/epoch=13-step=20636.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]

Generating experiment-result/run-06/checkpoints/epoch=53-step=79596.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]

Generating experiment-result/run-07/checkpoints/epoch=49-step=73700.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]

Generating experiment-result/run-08/checkpoints/epoch=55-step=82544.ckpt model output audio.


Generating output audio:   0%|          | 0/17 [00:00<?, ?it/s]