In [1]:
import os
import random
import numpy as np
import torch
from tqdm import tqdm

In [2]:
from speechbrain.pretrained import SepformerSeparation as separator
from utils import select_random_files, form_pairs_mf, separate_pair, evaluate_separation

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Generating random pairs

In [4]:
selected_files_m = select_random_files(files_per_folder=2, seconds_considered=5, specify_sex='M')
selected_files_f = select_random_files(files_per_folder=2, seconds_considered=5, specify_sex='F')
formed_pairs = form_pairs_mf(selected_files_m, selected_files_f)

In [5]:
formed_pairs = formed_pairs[:3]

## Loading model and separating audios

In [6]:
if device == torch.device("cuda"):
    model = separator.from_hparams(source="speechbrain/sepformer-whamr", savedir='pretrained_models/sepformer-whamr', run_opts={"device":"cuda"})
else:
    model = separator.from_hparams(source="speechbrain/sepformer-whamr", savedir='pretrained_models/sepformer-whamr')

In [7]:
si_sdr_list = []
for pair in tqdm(formed_pairs, "Evaluating pairs"):
    _, sources, predictions = separate_pair(pair, model, device)
    si_sdr1, si_sdr2 = evaluate_separation(sources, predictions)
    si_sdr_list.extend([si_sdr1, si_sdr2])

Evaluating pairs: 100%|███████████████████████████████████████████████████████████████████| 3/3 [00:31<00:00, 10.65s/it]


In [8]:
si_sdr_np = np.stack(si_sdr_list)
np.save("si_sdr_mf", si_sdr_np)

### SI-SDR Median

In [9]:
np.median(si_sdr_np)

11.430128

### SI-SDR Mean

In [10]:
np.mean(si_sdr_np)

10.9900255