In [1]:
%cd '/home/giorgio_mariani/Documents/audio-diffusion-pytorch-trainer/'

/home/giorgio_mariani/Documents/audio-diffusion-pytorch-trainer


In [2]:
pwd

'/home/giorgio_mariani/Documents/audio-diffusion-pytorch-trainer'

In [3]:
from collections import defaultdict
import json
import os
from pathlib import Path
from pathlib import Path
import re
from typing import List, Mapping, Optional, Tuple, Union
import main.module_base
from script.misc import hparams
import math
import numpy as np
#import museval
import pandas as pd
import torch
import torchaudio
import torchmetrics.functional.audio as tma
#from evaluation.evaluate_separation import evaluate_data
from tqdm import tqdm
from torchaudio.transforms import Resample

from main.dataset import is_silent
from main.likelihood import log_likelihood_song

In [4]:
def sdr(preds: torch.Tensor, target: torch.Tensor, eps: float = 1e-5) -> torch.Tensor:
    s_target = torch.norm(target, dim=-1)**2 + eps
    s_error = torch.norm(target - preds, dim=-1)**2 + eps
    return 10 * torch.log10(s_target/s_error)


In [5]:
def sisnr(preds: torch.Tensor, target: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
    alpha = (torch.sum(preds * target, dim=-1, keepdim=True) + eps) / (torch.sum(target**2, dim=-1, keepdim=True) + eps)
    target_scaled = alpha * target
    noise = target_scaled - preds
    s_target = torch.sum(target_scaled**2, dim=-1) + eps
    s_error = torch.sum(noise**2, dim=-1) + eps
    return 10 * torch.log10(s_target / s_error)

In [6]:
def get_rms(source_waveforms):
  """Return shape (source,) weights for signals that are nonzero."""
  return torch.sqrt(torch.mean(source_waveforms ** 2, dim=-1))
  #return source_norms <= 1e-8

In [7]:
def load_chunks(chunk_folder: Path) -> Tuple[Mapping[str, torch.Tensor], Mapping[str, torch.Tensor], int]:
    original_tracks_and_rate = {ori.name.split(".")[0][3:]: torchaudio.load(ori) for ori in sorted(list(chunk_folder.glob("ori*.wav")))}
    separated_tracks_and_rate = {sep.name.split(".")[0][3:]: torchaudio.load(sep) for sep in sorted(list(chunk_folder.glob("sep*.wav")))}
    assert tuple(original_tracks_and_rate.keys()) == tuple(separated_tracks_and_rate.keys())

    original_tracks = {k:t for k, (t,_) in original_tracks_and_rate.items()}
    sample_rates_ori = [s for (_,s) in original_tracks_and_rate.values()]

    separated_tracks = {k:t for k, (t,_) in separated_tracks_and_rate.items()}
    sample_rates_sep = [s for (_,s) in separated_tracks_and_rate.values()]

    assert len({*sample_rates_ori, *sample_rates_sep}) == 1, print({*sample_rates_ori, *sample_rates_sep})
    assert len(original_tracks) == len(separated_tracks)
    sr = sample_rates_ori[0]

    return original_tracks, separated_tracks, sr

In [8]:
def evaluate_chunks(separation_path: Union[str, Path], filter_silence: bool = True, batch_size: int = 512, orig_sr: int = 44100, resample_sr: Optional[int] = None):
    separation_folder = Path(separation_path)
    seps, oris, ms = defaultdict(list), defaultdict(list), []
    
    chunks = list(separation_folder.glob("*"))
    complete_results = defaultdict(list)

    resample_fn = Resample(orig_freq=orig_sr, new_freq=resample_sr) if resample_sr is not None else lambda x: x
    
    for ci, chunk_folder in enumerate((chunks)):
        if not chunk_folder.is_dir():
            continue
        
        original_tracks, separated_tracks, sr = load_chunks(chunk_folder)
       
        assert sr == orig_sr, f"chunk [{chunk_folder.name}]: expected freq={orig_sr}, track freq={sr}"

        m = sum(original_tracks.values())

        for k in original_tracks.keys():
            ori_t = original_tracks[k]
            sep_t = separated_tracks[k]

            rms = get_rms(ori_t)
            if rms <= 1e-8 and filter_silence:
                ori_t[:] = torch.nan

            ori_t = resample_fn(ori_t)
            sep_t = resample_fn(sep_t)
            
            oris[k].append(ori_t)
            seps[k].append(sep_t)

        ms.append(resample_fn(m))
        
        if (ci+1) % batch_size == 0 or (ci+1) == len(chunks):
            oris = {k: torch.stack(t, dim=0) for k,t in oris.items()}
            seps = {k: torch.stack(t, dim=0) for k,t in seps.items()}
            ms = torch.stack(ms, dim=0)

            results = {f"SISNRi_{k}": (sisnr(seps[k], oris[k]) - sisnr(ms, oris[k])).view(-1).tolist() for k in oris}
            #results = {f"SDR_{k}": sdr(seps[k], oris[k]).view(-1).tolist() for k in oris}
            for k,v in results.items():
                complete_results[k].extend(v)
            
            seps, oris, ms = defaultdict(list), defaultdict(list), []

    df = pd.DataFrame(complete_results).mean()
    return df.to_dict()

In [9]:
def evaluate_tracks(separation_path: Union[str, Path], orig_sr: int = 44100, resample_sr: Optional[int] = None):
    separation_folder = Path(separation_path)
    assert separation_folder.exists(), separation_folder
    assert (separation_folder.parent / "chunk_data.json").exists(), separation_folder

    with open(separation_folder.parent / "chunk_data.json") as f:
        chunk_data = json.load(f)

    resample_fn = Resample(orig_freq=orig_sr, new_freq=resample_sr) if resample_sr is not None else lambda x: x

    track_to_chunks = defaultdict(list)
    for chunk_data in chunk_data:
        track = chunk_data["track"]
        chunk_idx = chunk_data["chunk_index"]
        start_sample = chunk_data["start_chunk_sample"]
        #start_sample_sec = chunk_data["start_chunk_seconds"]
        #assert abs(start_sample / orig_sr  - start_sample_sec) <= 1e-12, abs(start_sample / orig_sr  - start_sample_sec)
        track_to_chunks[track].append( (start_sample, chunk_idx) )

    # reorder chunks into ascending order and compute sdr
    track_to_sdr = {}
    for track, chunks in tqdm(track_to_chunks.items()):
        sorted_chunks = sorted(chunks)

        separated_wavs = defaultdict(list)
        original_wavs = defaultdict(list)

        for _, chunk_idx in sorted_chunks:
            chunk_folder = separation_folder / str(chunk_idx)
            original_tracks, separated_tracks, sr = load_chunks(chunk_folder)
            assert sr == orig_sr, f"chunk [{chunk_folder.name}]: expected freq={orig_sr}, track freq={sr}"

            for k in separated_tracks:
                separated_wavs[k].append(separated_tracks[k])
                original_wavs[k].append(original_tracks[k])

        for k in separated_wavs:
            separated_wavs[k] = resample_fn(torch.cat(separated_wavs[k], dim=-1))
            original_wavs[k] = resample_fn(torch.cat(original_wavs[k], dim=-1))

        mixture = sum([owav for owav in original_wavs.values()])

        #track_to_sdr[track] = {f"SDR_{k}": sdr(separated_wavs[k], original_wavs[k]).item() for k in separated_wavs}
        track_to_sdr[track] = {f"SISNRi_{k}": (sisnr(separated_wavs[k],  original_wavs[k]) - sisnr(mixture, original_wavs[k])).item() for k in separated_wavs}
    return pd.DataFrame.from_records(track_to_sdr).transpose()


In [12]:
def evaluate_tracks_chunks(separation_path: Union[str, Path], chunk_prop: int, 
                           orig_sr: int = 44100, resample_sr: Optional[int] = None, 
                           filter_single_source: bool = True, eps: float = 10-8):

    separation_folder = Path(separation_path)
    assert separation_folder.exists(), separation_folder
    assert (separation_folder.parent / "chunk_data.json").exists(), separation_folder

    with open(separation_folder.parent / "chunk_data.json") as f:
        chunk_data = json.load(f)
        
    def load_model(path):
        model = main.module_base.Model(**{**hparams, "in_channels": 4})
        model.load_state_dict(torch.load(path)["state_dict"])
        model.to("cuda:0")
        return model
    
    ckpts_path = Path("/home/irene/Documents/audio-diffusion-pytorch-trainer/logs/ckpts")
    model = load_model(ckpts_path / "avid-darkness-164_epoch=419-valid_loss=0.015.ckpt")
    denoise_fn = model.model.diffusion.denoise_fn
    
    resample_fn = Resample(orig_freq=orig_sr, new_freq=resample_sr) if resample_sr is not None else lambda x: x

    track_to_chunks = defaultdict(list)
    for chunk_data in chunk_data:
        track = chunk_data["track"]
        chunk_idx = chunk_data["chunk_index"]
        start_sample = chunk_data["start_chunk_sample"]
        track_to_chunks[track].append( (start_sample, chunk_idx) )

    # reorder chunks into ascending order and compute sdr
    results = defaultdict(list)
    for track, chunks in tqdm(track_to_chunks.items()):
        sorted_chunks = sorted(chunks)
        print(chunks)

        separated_wavs, original_wavs = defaultdict(list), defaultdict(list)
        for _, chunk_idx in sorted_chunks:
            #print(len(sorted_chunks))
            chunk_folder = separation_folder / str(chunk_idx)
            original_tracks, separated_tracks, sr = load_chunks(chunk_folder)
            #print(separated_tracks)
            assert sr == orig_sr, f"chunk [{chunk_folder.name}]: expected freq={orig_sr}, track freq={sr}"

            for k in separated_tracks:  #DA STAMPARE QUESTI 3 PRINT PER VEDERE IL BUUUUUUUGGGGGG!!!!!!
                print(k)
                separated_wavs[k].append(separated_tracks[k])
                print(separated_tracks)
                print(separated_wavs)
                
                original_wavs[k].append(original_tracks[k])

            
        original_tensors, separated_tensors = {}, {}
        for k in separated_wavs:
            
            separated_tensors[k] = resample_fn(torch.cat(separated_wavs[k], dim=-1).view(-1))
            #print(torch.cat(separated_wavs[k], dim=-1).view(-1).shape)
            original_tensors[k] = resample_fn(torch.cat(original_wavs[k], dim=-1).view(-1))
     
        mixture = sum([owav for owav in original_tensors.values()])
        
        
        chunk_size = int(separated_tensors["1"].shape[0] * chunk_prop)
        #print(chunk_size)
        generated_mixture = torch.stack([separated_tensors[k] for k in separated_tensors]).unsqueeze(0).to("cuda:0")
        #print(generated_mixture)
        for k in separated_tensors:
            o = original_tensors[k]
            s = separated_tensors[k]
            m = mixture
            padded_source = torch.zeros((1, 4, s.shape[-1]))
            j = int(k) -1
            padded_source[:, j:j+1, :] = s
            #lik = torch.zeros(1)
            #lik, _, _ = log_likelihood_song(denoise_fn, padded_source.to("cuda:0"), sigma_max=1.0) # Hard coded sigma_max
            #for i in range(mixture.shape[-1] // chunk_size):
            #    results[f"log_lik_{k}"].append(lik.item())
        #lik, _, _ = log_likelihood_song(denoise_fn, generated_mixture, sigma_max=1.0)
        
        #for i in range(mixture.shape[-1] // chunk_size):
        #    results[f"log_lik_mixture"].append(lik.item())
            
        for i in range(mixture.shape[-1] // chunk_size):
            num_silent_signals = 0
            for k in separated_tensors:
                o = original_tensors[k][i*chunk_size:(i+1)*chunk_size]
                print(o.unsqueeze(0).shape)
                if is_silent(o.unsqueeze(0)) and filter_single_source:
                    num_silent_signals += 1
            if num_silent_signals > 3:
                continue
            else:
                for k in separated_tensors:
                    o = original_tensors[k][i*chunk_size:(i+1)*chunk_size]
                    s = separated_tensors[k][i*chunk_size: (i+1)*chunk_size]
                    m = mixture[i*chunk_size: (i+1)*chunk_size]
                    results[k].append((sisnr(s, o, eps) - sisnr(m, o, eps)).item())
    #print(results)
    return pd.DataFrame(results)

In [13]:
results = evaluate_tracks_chunks("separations/debug/sep_round_0", chunk_prop=1/3, orig_sr=22050, eps=1e-8)


  7%|█████████▋                                                                                                                               | 5/71 [00:00<00:01, 44.12it/s]

1
{'1': tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]]), '2': tensor([[ 2.0195e-02, -2.8032e-04, -3.0696e-05,  ...,  1.9598e-02,
         -3.6496e-04,  4.1126e-03]]), '3': tensor([[-0.0005, -0.0014, -0.0003,  ...,  0.0298,  0.0267,  0.0206]]), '4': tensor([[-0.0016, -0.0013, -0.0010,  ...,  0.0173,  0.0228,  0.0060]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]])]})
2
{'1': tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]]), '2': tensor([[ 2.0195e-02, -2.8032e-04, -3.0696e-05,  ...,  1.9598e-02,
         -3.6496e-04,  4.1126e-03]]), '3': tensor([[-0.0005, -0.0014, -0.0003,  ...,  0.0298,  0.0267,  0.0206]]), '4': tensor([[-0.0016, -0.0013, -0.0010,  ...,  0.0173,  0.0228,  0.0060]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]])], '2': [tensor([[ 2.0195e-02, -2.8032e-04, -3.0696e-05,  ...,  1.9598e-02,
         -3.6496e-04, 

 14%|███████████████████▏                                                                                                                    | 10/71 [00:00<00:01, 46.76it/s]

torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
1
{'1': tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]]), '2': tensor([[ 0.0504,  0.0153,  0.0087,  ...,  0.0002, -0.0180,  0.0095]]), '3': tensor([[ 0.0008, -0.0023,  0.0015,  ...,  0.0142,  0.0087, -0.0018]]), '4': tensor([[-0.0047, -0.0034, -0.0003,  ...,  0.0295,  0.0363,  0.0315]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]])]})
2
{'1': tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]]), '2': tensor([[ 0.0504,  0.0153,  0.0087,  ...,  0.0002, -0.0180,  0.0095]]), '3': tensor([[ 0.0008, -0.0023,  0.0015,  ...,  0.0142,  0.0087, -0.0018]]), '4': tensor([[-0.0047, -0.0034, -0.0003,  ...,  0.0295,  0.0363,  0.03

 27%|████████████████████████████████████▍                                                                                                   | 19/71 [00:00<00:01, 32.27it/s]

1
{'1': tensor([[ 0.0667,  0.0197,  0.0133,  ..., -0.0160, -0.0047, -0.0012]]), '2': tensor([[-0.0735, -0.0300, -0.0215,  ...,  0.0233, -0.0162, -0.0188]]), '3': tensor([[-0.0107, -0.0037, -0.0010,  ...,  0.0044,  0.0048, -0.0036]]), '4': tensor([[ 0.0103,  0.0038, -0.0003,  ..., -0.0022, -0.0035,  0.0217]])}
defaultdict(<class 'list'>, {'1': [tensor([[ 0.0667,  0.0197,  0.0133,  ..., -0.0160, -0.0047, -0.0012]])]})
2
{'1': tensor([[ 0.0667,  0.0197,  0.0133,  ..., -0.0160, -0.0047, -0.0012]]), '2': tensor([[-0.0735, -0.0300, -0.0215,  ...,  0.0233, -0.0162, -0.0188]]), '3': tensor([[-0.0107, -0.0037, -0.0010,  ...,  0.0044,  0.0048, -0.0036]]), '4': tensor([[ 0.0103,  0.0038, -0.0003,  ..., -0.0022, -0.0035,  0.0217]])}
defaultdict(<class 'list'>, {'1': [tensor([[ 0.0667,  0.0197,  0.0133,  ..., -0.0160, -0.0047, -0.0012]])], '2': [tensor([[-0.0735, -0.0300, -0.0215,  ...,  0.0233, -0.0162, -0.0188]])]})
3
{'1': tensor([[ 0.0667,  0.0197,  0.0133,  ..., -0.0160, -0.0047, -0.0012]]), '

 44%|███████████████████████████████████████████████████████████▍                                                                            | 31/71 [00:00<00:00, 41.02it/s]

torch.Size([1, 87381])
torch.Size([1, 87381])
torch.Size([1, 87381])
1
{'1': tensor([[-0.0130, -0.0154, -0.0113,  ..., -0.0652, -0.0549, -0.0574]]), '2': tensor([[-0.0018, -0.0438, -0.0412,  ..., -0.0014, -0.0132,  0.0042]]), '3': tensor([[-0.0141, -0.0174, -0.0140,  ...,  0.0020,  0.0052,  0.0011]]), '4': tensor([[-0.0597, -0.0643, -0.0564,  ..., -0.0206, -0.0090, -0.0101]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0130, -0.0154, -0.0113,  ..., -0.0652, -0.0549, -0.0574]])]})
2
{'1': tensor([[-0.0130, -0.0154, -0.0113,  ..., -0.0652, -0.0549, -0.0574]]), '2': tensor([[-0.0018, -0.0438, -0.0412,  ..., -0.0014, -0.0132,  0.0042]]), '3': tensor([[-0.0141, -0.0174, -0.0140,  ...,  0.0020,  0.0052,  0.0011]]), '4': tensor([[-0.0597, -0.0643, -0.0564,  ..., -0.0206, -0.0090, -0.0101]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0130, -0.0154, -0.0113,  ..., -0.0652, -0.0549, -0.0574]])], '2': [tensor([[-0.0018, -0.0438, -0.0412,  ..., -0.0014, -0.0132,  0.0042]])]})
3
{'1': tens

 51%|████████████████████████████████████████████████████████████████████▉                                                                   | 36/71 [00:00<00:00, 38.09it/s]

1
{'1': tensor([[ 0.0015, -0.0016,  0.0014,  ...,  0.0221,  0.0266,  0.0254]]), '2': tensor([[ 0.0039,  0.0048,  0.0009,  ...,  0.0013, -0.0105,  0.0056]]), '3': tensor([[-0.0026, -0.0017, -0.0003,  ...,  0.0160,  0.0191,  0.0153]]), '4': tensor([[-0.0032, -0.0014, -0.0009,  ..., -0.0003,  0.0078,  0.0044]])}
defaultdict(<class 'list'>, {'1': [tensor([[ 0.0015, -0.0016,  0.0014,  ...,  0.0221,  0.0266,  0.0254]])]})
2
{'1': tensor([[ 0.0015, -0.0016,  0.0014,  ...,  0.0221,  0.0266,  0.0254]]), '2': tensor([[ 0.0039,  0.0048,  0.0009,  ...,  0.0013, -0.0105,  0.0056]]), '3': tensor([[-0.0026, -0.0017, -0.0003,  ...,  0.0160,  0.0191,  0.0153]]), '4': tensor([[-0.0032, -0.0014, -0.0009,  ..., -0.0003,  0.0078,  0.0044]])}
defaultdict(<class 'list'>, {'1': [tensor([[ 0.0015, -0.0016,  0.0014,  ...,  0.0221,  0.0266,  0.0254]])], '2': [tensor([[ 0.0039,  0.0048,  0.0009,  ...,  0.0013, -0.0105,  0.0056]])]})
3
{'1': tensor([[ 0.0015, -0.0016,  0.0014,  ...,  0.0221,  0.0266,  0.0254]]), '

 65%|████████████████████████████████████████████████████████████████████████████████████████                                                | 46/71 [00:01<00:00, 41.28it/s]

defaultdict(<class 'list'>, {'1': [tensor([[-0.0031,  0.0020, -0.0012,  ..., -0.0060,  0.0005,  0.0009]])], '2': [tensor([[ 0.0046, -0.0017, -0.0003,  ...,  0.0266,  0.0180,  0.0316]])], '3': [tensor([[-2.6695e-03, -2.5066e-03,  7.6294e-05,  ...,  1.4094e-02,
          1.2485e-02,  7.9007e-03]])]})
4
{'1': tensor([[-0.0031,  0.0020, -0.0012,  ..., -0.0060,  0.0005,  0.0009]]), '2': tensor([[ 0.0046, -0.0017, -0.0003,  ...,  0.0266,  0.0180,  0.0316]]), '3': tensor([[-2.6695e-03, -2.5066e-03,  7.6294e-05,  ...,  1.4094e-02,
          1.2485e-02,  7.9007e-03]]), '4': tensor([[ 0.0012,  0.0022,  0.0014,  ..., -0.0084, -0.0086, -0.0187]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0031,  0.0020, -0.0012,  ..., -0.0060,  0.0005,  0.0009]])], '2': [tensor([[ 0.0046, -0.0017, -0.0003,  ...,  0.0266,  0.0180,  0.0316]])], '3': [tensor([[-2.6695e-03, -2.5066e-03,  7.6294e-05,  ...,  1.4094e-02,
          1.2485e-02,  7.9007e-03]])], '4': [tensor([[ 0.0012,  0.0022,  0.0014,  ..., -0.0084, 

 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████                         | 58/71 [00:01<00:00, 48.48it/s]

defaultdict(<class 'list'>, {'1': [tensor([[-0.0374, -0.0644, -0.0684,  ..., -0.0180, -0.0135,  0.0030]]), tensor([[-0.0174, -0.0218, -0.0257,  ...,  0.0027,  0.0090,  0.0168]]), tensor([[ 0.0513,  0.0427,  0.0434,  ..., -0.0356, -0.0251, -0.0190]])], '2': [tensor([[-0.0506, -0.0491, -0.0363,  ..., -0.0147, -0.0245, -0.0223]]), tensor([[-0.0059, -0.0058, -0.0042,  ...,  0.0036, -0.0016,  0.0020]]), tensor([[-0.0137, -0.0003, -0.0035,  ..., -0.0501, -0.0582, -0.0585]])], '3': [tensor([[-0.0172, -0.0219, -0.0194,  ..., -0.0008, -0.0006, -0.0005]]), tensor([[ 0.0072,  0.0052,  0.0082,  ...,  0.0076,  0.0039, -0.0009]]), tensor([[-0.0004,  0.0105,  0.0059,  ..., -0.0157, -0.0156, -0.0265]])], '4': [tensor([[-0.0090, -0.0244, -0.0187,  ...,  0.0175,  0.0203,  0.0296]]), tensor([[ 0.0040,  0.0027,  0.0010,  ...,  0.0021,  0.0013, -0.0055]]), tensor([[ 0.0135,  0.0172,  0.0151,  ..., -0.0329, -0.0238, -0.0253]])]})
torch.Size([1, 262144])
torch.Size([1, 262144])
torch.Size([1, 262144])
torch.

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:01<00:00, 43.61it/s]

1
{'1': tensor([[-0.0178,  0.0024,  0.0016,  ...,  0.0350,  0.0486,  0.0434]]), '2': tensor([[ 0.0245,  0.0033,  0.0023,  ...,  0.0053, -0.0117,  0.0111]]), '3': tensor([[-0.0006, -0.0022,  0.0005,  ...,  0.0114,  0.0076,  0.0050]]), '4': tensor([[-0.0043, -0.0006, -0.0023,  ...,  0.0133,  0.0171,  0.0075]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0178,  0.0024,  0.0016,  ...,  0.0350,  0.0486,  0.0434]])]})
2
{'1': tensor([[-0.0178,  0.0024,  0.0016,  ...,  0.0350,  0.0486,  0.0434]]), '2': tensor([[ 0.0245,  0.0033,  0.0023,  ...,  0.0053, -0.0117,  0.0111]]), '3': tensor([[-0.0006, -0.0022,  0.0005,  ...,  0.0114,  0.0076,  0.0050]]), '4': tensor([[-0.0043, -0.0006, -0.0023,  ...,  0.0133,  0.0171,  0.0075]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0178,  0.0024,  0.0016,  ...,  0.0350,  0.0486,  0.0434]])], '2': [tensor([[ 0.0245,  0.0033,  0.0023,  ...,  0.0053, -0.0117,  0.0111]])]})
3
{'1': tensor([[-0.0178,  0.0024,  0.0016,  ...,  0.0350,  0.0486,  0.0434]]), '




In [134]:
def evaluate_tracks_chunks_mike(separation_path: Union[str, Path], chunk_prop: int, 
                           orig_sr: int = 44100, resample_sr: Optional[int] = None, 
                           filter_single_source: bool = True, eps: float = 10-8):

    separation_folder = Path(separation_path)
    assert separation_folder.exists(), separation_folder
    assert (separation_folder.parent / "chunk_data.json").exists(), separation_folder

    with open(separation_folder.parent / "chunk_data.json") as f:
        chunk_data = json.load(f)
        
    def load_model(path):
        model = main.module_base.Model(**{**hparams, "in_channels": 4})
        model.load_state_dict(torch.load(path)["state_dict"])
        model.to("cuda:0")
        return model
    
    ckpts_path = Path("/home/irene/Documents/audio-diffusion-pytorch-trainer/logs/ckpts")
    model = load_model(ckpts_path / "avid-darkness-164_epoch=419-valid_loss=0.015.ckpt")
    denoise_fn = model.model.diffusion.denoise_fn
    
    resample_fn = Resample(orig_freq=orig_sr, new_freq=resample_sr) if resample_sr is not None else lambda x: x

    track_to_chunks = defaultdict(list)
    for chunk_data in chunk_data:
        track = chunk_data["track"]
        chunk_idx = chunk_data["chunk_index"]
        start_sample = chunk_data["start_chunk_sample"]
        track_to_chunks[track].append( (start_sample, chunk_idx) )

    # reorder chunks into ascending order and compute sdr
    results = defaultdict(list)
    for track, chunks in tqdm(track_to_chunks.items()):
        sorted_chunks = sorted(chunks)

        separated_wavs, original_wavs = defaultdict(list), defaultdict(list)
        for _, chunk_idx in sorted_chunks:
                        
            chunk_folder = separation_folder / str(chunk_idx)
            original_tracks, separated_tracks, sr = load_chunks(chunk_folder)
           
            assert sr == orig_sr, f"chunk [{chunk_folder.name}]: expected freq={orig_sr}, track freq={sr}"
            
            
        mixture = sum([owav for owav in original_tracks.values()])
        
        chunk_size = int(separated_tracks["1"].shape[-1] * chunk_prop)
        
        for k in separated_tracks:
            o = original_tracks[k]
            
            s = separated_tracks[k]
            
            m = mixture
            
            
        for i in range(mixture.shape[-1] // chunk_size):
            
            num_silent_signals = 0
            for k in separated_tracks:
                
                o = original_tracks[k][:,i*chunk_size:(i+1)*chunk_size]
               
                if is_silent(o) and filter_single_source:
                    num_silent_signals += 1
            if num_silent_signals > 3:
                continue
            else:
                for k in separated_tracks:
                    o = original_tracks[k][:,i*chunk_size:(i+1)*chunk_size]
                    s = separated_tracks[k][:,i*chunk_size: (i+1)*chunk_size]
                    m = mixture[:,i*chunk_size: (i+1)*chunk_size]
                    results[k].append((sisnr(s, o, eps) - sisnr(m, o, eps)).item())
                    
    return pd.DataFrame(results)

In [135]:
def evaluate_tracks_chunks_mike_with_overlap(separation_path: Union[str, Path],
                           orig_sr: int = 44100, resample_sr: Optional[int] = None, 
                           filter_single_source: bool = True, eps: float = 10-8, chunk_duration: float = 4.0, overlap_duration: float = 2.0):

    separation_folder = Path(separation_path)
    assert separation_folder.exists(), separation_folder
    assert (separation_folder.parent / "chunk_data.json").exists(), separation_folder

    with open(separation_folder.parent / "chunk_data.json") as f:
        chunk_data = json.load(f)
        
    def load_model(path):
        model = main.module_base.Model(**{**hparams, "in_channels": 4})
        model.load_state_dict(torch.load(path)["state_dict"])
        model.to("cuda:0")
        return model
    
    ckpts_path = Path("/home/irene/Documents/audio-diffusion-pytorch-trainer/logs/ckpts")
    model = load_model(ckpts_path / "avid-darkness-164_epoch=419-valid_loss=0.015.ckpt")
    denoise_fn = model.model.diffusion.denoise_fn
    
    resample_fn = Resample(orig_freq=orig_sr, new_freq=resample_sr) if resample_sr is not None else lambda x: x

    track_to_chunks = defaultdict(list)
    for chunk_data in chunk_data:
        track = chunk_data["track"]
        chunk_idx = chunk_data["chunk_index"]
        start_sample = chunk_data["start_chunk_sample"]
        track_to_chunks[track].append( (start_sample, chunk_idx) )

    # reorder chunks into ascending order and compute sdr
    results = defaultdict(list)
    for track, chunks in tqdm(track_to_chunks.items()):
        sorted_chunks = sorted(chunks)

        separated_wavs, original_wavs = defaultdict(list), defaultdict(list)
        for _, chunk_idx in sorted_chunks:
                        
            chunk_folder = separation_folder / str(chunk_idx)
            original_tracks, separated_tracks, sr = load_chunks(chunk_folder)
            #print(separated_tracks['1'].shape)
           
            assert sr == orig_sr, f"chunk [{chunk_folder.name}]: expected freq={orig_sr}, track freq={sr}"
            
            
        mixture = sum([owav for owav in original_tracks.values()])
        
        chunk_samples = chunk_duration * orig_sr
        overlap_samples = overlap_duration * orig_sr
        
        chunk_samples = int(chunk_duration * orig_sr)
        
        overlap_samples = int(overlap_duration * orig_sr)

        # Calculate the step size between consecutive chunks
        step_size = chunk_samples - overlap_samples

        # Determine the number of chunks based on step_size
        num_chunks = math.ceil((mixture.shape[-1] - overlap_samples) / step_size)
        #print(mixture.shape)
        

        for i in range(num_chunks):
            start_sample = i * step_size
            end_sample = start_sample + chunk_samples

            num_silent_signals = 0
            for k in separated_tracks:
                o = original_tracks[k][:,start_sample:end_sample]
                #print(o.shape)
                if is_silent(o) and filter_single_source:
                    num_silent_signals += 1
            if num_silent_signals > 3:
                continue
            else:
                for k in separated_tracks:
                    o = original_tracks[k][:,start_sample:end_sample]
                    s = separated_tracks[k][:,start_sample:end_sample]
                    m = mixture[:,start_sample:end_sample]
                    results[k].append((sisnr(s, o, eps) - sisnr(m, o, eps)).item())


    return pd.DataFrame(results)

In [136]:
results = evaluate_tracks_chunks_mike("separations/debug/sep_round_0", chunk_prop=1/3, orig_sr=22050, eps=1e-8)
results.shape

100%|███████████████████████████████████████████| 71/71 [00:00<00:00, 75.29it/s]


(213, 4)

In [137]:
results_wo = evaluate_tracks_chunks_mike_with_overlap("separations/debug/sep_round_0",chunk_duration=4.0, overlap_duration=2.0 , orig_sr=22050, eps=1e-8)
results_wo.shape

100%|███████████████████████████████████████████| 71/71 [00:01<00:00, 66.42it/s]


(355, 4)

In [138]:
mean_results = results.mean()
print("Mean results:")
#print(mean_results)

# Access mean values by column name
mean_col1 = mean_results['1']
mean_col2 = mean_results['2']
mean_col3 = mean_results['3']
mean_col4 = mean_results['4']

print("\nMean values by column name:")
print("Column 1:", mean_col1)
print("Column 2:", mean_col2)
print("Column 3:", mean_col3)
print("Column 4:", mean_col4)

mean_of_means = mean_results.mean()

print("Mean of means:", mean_of_means)

Mean results:

Mean values by column name:
Column 1: 15.575636171958816
Column 2: 16.88300941919497
Column 3: 13.593393392965828
Column 4: 14.085952395564513
Mean of means: 15.034497844921031


In [139]:
mean_results = results_wo.mean()
print("Mean results:")
#print(mean_results)

# Access mean values by column name
mean_col1 = mean_results['1']
mean_col2 = mean_results['2']
mean_col3 = mean_results['3']
mean_col4 = mean_results['4']

print("\nMean values by column name:")
print("Column 1:", mean_col1)
print("Column 2:", mean_col2)
print("Column 3:", mean_col3)
print("Column 4:", mean_col4)

mean_of_means = mean_results.mean()

print("Mean of means:", mean_of_means)

Mean results:

Mean values by column name:
Column 1: 15.875413102163396
Column 2: 16.880386715203944
Column 3: 13.490192078200865
Column 4: 14.140144774947368
Mean of means: 15.096534167628892


In [11]:
def evaluate_tracks_chunks_with_overlap(separation_path: Union[str, Path], 
                           orig_sr: int = 44100, resample_sr: Optional[int] = None, 
                           filter_single_source: bool = True, eps: float = 10-8, 
                           chunk_duration: float = 4.0, overlap_duration: float = 2.0):

    separation_folder = Path(separation_path)
    assert separation_folder.exists(), separation_folder
    assert (separation_folder.parent / "chunk_data.json").exists(), separation_folder

    with open(separation_folder.parent / "chunk_data.json") as f:
        chunk_data = json.load(f)
        
    def load_model(path):
        model = main.module_base.Model(**{**hparams, "in_channels": 4})
        model.load_state_dict(torch.load(path)["state_dict"])
        model.to("cuda:0")
        return model
    
    ckpts_path = Path("/home/irene/Documents/audio-diffusion-pytorch-trainer/logs/ckpts")
    model = load_model(ckpts_path / "avid-darkness-164_epoch=419-valid_loss=0.015.ckpt")
    denoise_fn = model.model.diffusion.denoise_fn

    resample_fn = Resample(orig_freq=orig_sr, new_freq=resample_sr) if resample_sr is not None else lambda x: x

    track_to_chunks = defaultdict(list)
    for chunk_data in chunk_data:
        track = chunk_data["track"]
        chunk_idx = chunk_data["chunk_index"]
        start_sample = chunk_data["start_chunk_sample"]
        track_to_chunks[track].append( (start_sample, chunk_idx) )

    results = defaultdict(list)
    
    for track, chunks in tqdm(track_to_chunks.items()):
        
        #print(track)
        sorted_chunks = sorted(chunks)

        separated_wavs, original_wavs = defaultdict(list), defaultdict(list)
        for _, chunk_idx in sorted_chunks:
            chunk_folder = separation_folder / str(chunk_idx)
            original_tracks, separated_tracks, sr = load_chunks(chunk_folder)
            assert sr == orig_sr, f"chunk [{chunk_folder.name}]: expected freq={orig_sr}, track freq={sr}"

            for k in separated_tracks:
                separated_wavs[k].append(separated_tracks[k])
                original_wavs[k].append(original_tracks[k])

        original_tensors, separated_tensors = {}, {}
        for k in separated_wavs:
            separated_tensors[k] = resample_fn(torch.cat(separated_wavs[k], dim=-1).view(-1))
            original_tensors[k] = resample_fn(torch.cat(original_wavs[k], dim=-1).view(-1))
       
        mixture = sum([owav for owav in original_tensors.values()])
        #print(mixture.shape)

        # The new code for handling overlapping chunks starts here
        chunk_samples = chunk_duration * orig_sr
        overlap_samples = overlap_duration * orig_sr
        
        chunk_samples = int(chunk_duration * orig_sr)
        
        overlap_samples = int(overlap_duration * orig_sr)

        # Calculate the step size between consecutive chunks
        step_size = chunk_samples - overlap_samples

        # Determine the number of chunks based on step_size
        num_chunks = math.ceil((mixture.shape[-1] - overlap_samples) / step_size)
        #print(num_chunks)
        

        for i in range(num_chunks):
            start_sample = i * step_size
            end_sample = start_sample + chunk_samples

            num_silent_signals = 0
            for k in separated_tensors:
                o = original_tensors[k][start_sample:end_sample]
                if is_silent(o.unsqueeze(0)) and filter_single_source:
                    num_silent_signals += 1
            if num_silent_signals > 3:
                continue
            else:
                for k in separated_tensors:
                    o = original_tensors[k][start_sample:end_sample]
                    s = separated_tensors[k][start_sample:end_sample]
                    m = mixture[start_sample:end_sample]
                    results[k].append((sisnr(s, o, eps) - sisnr(m, o, eps)).item())


    return pd.DataFrame(results)

In [12]:
results = evaluate_tracks_chunks("separations/debug/sep_round_0", chunk_prop=1/3, orig_sr=22050, eps=1e-8)
#results.shape

  7%|███                                         | 5/71 [00:00<00:01, 44.78it/s]

1
{'1': tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]]), '2': tensor([[ 2.0195e-02, -2.8032e-04, -3.0696e-05,  ...,  1.9598e-02,
         -3.6496e-04,  4.1126e-03]]), '3': tensor([[-0.0005, -0.0014, -0.0003,  ...,  0.0298,  0.0267,  0.0206]]), '4': tensor([[-0.0016, -0.0013, -0.0010,  ...,  0.0173,  0.0228,  0.0060]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]])]})
2
{'1': tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]]), '2': tensor([[ 2.0195e-02, -2.8032e-04, -3.0696e-05,  ...,  1.9598e-02,
         -3.6496e-04,  4.1126e-03]]), '3': tensor([[-0.0005, -0.0014, -0.0003,  ...,  0.0298,  0.0267,  0.0206]]), '4': tensor([[-0.0016, -0.0013, -0.0010,  ...,  0.0173,  0.0228,  0.0060]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0374, -0.0252, -0.0255,  ...,  0.0216,  0.0237,  0.0265]])], '2': [tensor([[ 2.0195e-02, -2.8032e-04, -3.0696e-05,  ...,  1.9598e-02,
         -3.6496e-04, 

 15%|██████▋                                    | 11/71 [00:00<00:01, 39.54it/s]

1
{'1': tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]]), '2': tensor([[ 0.0504,  0.0153,  0.0087,  ...,  0.0002, -0.0180,  0.0095]]), '3': tensor([[ 0.0008, -0.0023,  0.0015,  ...,  0.0142,  0.0087, -0.0018]]), '4': tensor([[-0.0047, -0.0034, -0.0003,  ...,  0.0295,  0.0363,  0.0315]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]])]})
2
{'1': tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]]), '2': tensor([[ 0.0504,  0.0153,  0.0087,  ...,  0.0002, -0.0180,  0.0095]]), '3': tensor([[ 0.0008, -0.0023,  0.0015,  ...,  0.0142,  0.0087, -0.0018]]), '4': tensor([[-0.0047, -0.0034, -0.0003,  ...,  0.0295,  0.0363,  0.0315]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]])], '2': [tensor([[ 0.0504,  0.0153,  0.0087,  ...,  0.0002, -0.0180,  0.0095]])]})
3
{'1': tensor([[-0.0465, -0.0097, -0.0098,  ...,  0.0210,  0.0296,  0.0183]]), '

 28%|████████████                               | 20/71 [00:00<00:01, 35.25it/s]

1
{'1': tensor([[-0.0048, -0.0023, -0.0036,  ..., -0.0138, -0.0075, -0.0088]]), '2': tensor([[-0.0601, -0.0808, -0.0692,  ..., -0.0176, -0.0188, -0.0367]]), '3': tensor([[ 0.0136,  0.0146,  0.0126,  ..., -0.0088, -0.0123, -0.0097]]), '4': tensor([[0.0080, 0.0088, 0.0082,  ..., 0.0088, 0.0096, 0.0145]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0048, -0.0023, -0.0036,  ..., -0.0138, -0.0075, -0.0088]])]})
2
{'1': tensor([[-0.0048, -0.0023, -0.0036,  ..., -0.0138, -0.0075, -0.0088]]), '2': tensor([[-0.0601, -0.0808, -0.0692,  ..., -0.0176, -0.0188, -0.0367]]), '3': tensor([[ 0.0136,  0.0146,  0.0126,  ..., -0.0088, -0.0123, -0.0097]]), '4': tensor([[0.0080, 0.0088, 0.0082,  ..., 0.0088, 0.0096, 0.0145]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0048, -0.0023, -0.0036,  ..., -0.0138, -0.0075, -0.0088]])], '2': [tensor([[-0.0601, -0.0808, -0.0692,  ..., -0.0176, -0.0188, -0.0367]])]})
3
{'1': tensor([[-0.0048, -0.0023, -0.0036,  ..., -0.0138, -0.0075, -0.0088]]), '2': tensor([

 44%|██████████████████▊                        | 31/71 [00:00<00:00, 42.61it/s]

1
{'1': tensor([[ 0.0450,  0.0183,  0.0132,  ..., -0.0435, -0.0410, -0.0454]]), '2': tensor([[-0.0364, -0.0040, -0.0001,  ...,  0.0087, -0.0084,  0.0182]]), '3': tensor([[-0.0011,  0.0037,  0.0047,  ...,  0.0627,  0.0552,  0.0308]]), '4': tensor([[0.0094, 0.0060, 0.0032,  ..., 0.0450, 0.0535, 0.0452]])}
defaultdict(<class 'list'>, {'1': [tensor([[ 0.0450,  0.0183,  0.0132,  ..., -0.0435, -0.0410, -0.0454]])]})
2
{'1': tensor([[ 0.0450,  0.0183,  0.0132,  ..., -0.0435, -0.0410, -0.0454]]), '2': tensor([[-0.0364, -0.0040, -0.0001,  ...,  0.0087, -0.0084,  0.0182]]), '3': tensor([[-0.0011,  0.0037,  0.0047,  ...,  0.0627,  0.0552,  0.0308]]), '4': tensor([[0.0094, 0.0060, 0.0032,  ..., 0.0450, 0.0535, 0.0452]])}
defaultdict(<class 'list'>, {'1': [tensor([[ 0.0450,  0.0183,  0.0132,  ..., -0.0435, -0.0410, -0.0454]])], '2': [tensor([[-0.0364, -0.0040, -0.0001,  ...,  0.0087, -0.0084,  0.0182]])]})
3
{'1': tensor([[ 0.0450,  0.0183,  0.0132,  ..., -0.0435, -0.0410, -0.0454]]), '2': tensor([

 58%|████████████████████████▊                  | 41/71 [00:01<00:00, 41.25it/s]

1
{'1': tensor([[-0.0375, -0.0338, -0.0329,  ...,  0.0119,  0.0123,  0.0112]]), '2': tensor([[ 0.0022, -0.0172,  0.0007,  ...,  0.0045, -0.0008,  0.0051]]), '3': tensor([[-0.0252, -0.0329, -0.0262,  ...,  0.0022,  0.0026, -0.0010]]), '4': tensor([[-0.0226, -0.0276, -0.0272,  ..., -0.0157, -0.0094, -0.0081]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0375, -0.0338, -0.0329,  ...,  0.0119,  0.0123,  0.0112]])]})
2
{'1': tensor([[-0.0375, -0.0338, -0.0329,  ...,  0.0119,  0.0123,  0.0112]]), '2': tensor([[ 0.0022, -0.0172,  0.0007,  ...,  0.0045, -0.0008,  0.0051]]), '3': tensor([[-0.0252, -0.0329, -0.0262,  ...,  0.0022,  0.0026, -0.0010]]), '4': tensor([[-0.0226, -0.0276, -0.0272,  ..., -0.0157, -0.0094, -0.0081]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0375, -0.0338, -0.0329,  ...,  0.0119,  0.0123,  0.0112]])], '2': [tensor([[ 0.0022, -0.0172,  0.0007,  ...,  0.0045, -0.0008,  0.0051]])]})
3
{'1': tensor([[-0.0375, -0.0338, -0.0329,  ...,  0.0119,  0.0123,  0.0112]]), '

 72%|██████████████████████████████▉            | 51/71 [00:01<00:00, 43.52it/s]

{'1': tensor([[-0.0326, -0.0079, -0.0066,  ..., -0.0570, -0.0523, -0.0530]]), '2': tensor([[ 0.0469,  0.0203,  0.0162,  ..., -0.0453, -0.0488, -0.0690]]), '3': tensor([[ 0.0206,  0.0252,  0.0359,  ...,  0.0060, -0.0003, -0.0023]]), '4': tensor([[-0.0180, -0.0095, -0.0135,  ...,  0.0187,  0.0150,  0.0066]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0326, -0.0079, -0.0066,  ..., -0.0570, -0.0523, -0.0530]])]})
2
{'1': tensor([[-0.0326, -0.0079, -0.0066,  ..., -0.0570, -0.0523, -0.0530]]), '2': tensor([[ 0.0469,  0.0203,  0.0162,  ..., -0.0453, -0.0488, -0.0690]]), '3': tensor([[ 0.0206,  0.0252,  0.0359,  ...,  0.0060, -0.0003, -0.0023]]), '4': tensor([[-0.0180, -0.0095, -0.0135,  ...,  0.0187,  0.0150,  0.0066]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0326, -0.0079, -0.0066,  ..., -0.0570, -0.0523, -0.0530]])], '2': [tensor([[ 0.0469,  0.0203,  0.0162,  ..., -0.0453, -0.0488, -0.0690]])]})
3
{'1': tensor([[-0.0326, -0.0079, -0.0066,  ..., -0.0570, -0.0523, -0.0530]]), '2'

 90%|██████████████████████████████████████▊    | 64/71 [00:01<00:00, 52.24it/s]

1
{'1': tensor([[-0.0157, -0.0155, -0.0185,  ..., -0.0165, -0.0093, -0.0050]]), '2': tensor([[ 0.0016, -0.0012,  0.0010,  ...,  0.0040, -0.0038,  0.0052]]), '3': tensor([[0.0007, 0.0007, 0.0010,  ..., 0.0188, 0.0330, 0.0267]]), '4': tensor([[0.0109, 0.0115, 0.0106,  ..., 0.0507, 0.0678, 0.0811]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0157, -0.0155, -0.0185,  ..., -0.0165, -0.0093, -0.0050]])]})
2
{'1': tensor([[-0.0157, -0.0155, -0.0185,  ..., -0.0165, -0.0093, -0.0050]]), '2': tensor([[ 0.0016, -0.0012,  0.0010,  ...,  0.0040, -0.0038,  0.0052]]), '3': tensor([[0.0007, 0.0007, 0.0010,  ..., 0.0188, 0.0330, 0.0267]]), '4': tensor([[0.0109, 0.0115, 0.0106,  ..., 0.0507, 0.0678, 0.0811]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0157, -0.0155, -0.0185,  ..., -0.0165, -0.0093, -0.0050]])], '2': [tensor([[ 0.0016, -0.0012,  0.0010,  ...,  0.0040, -0.0038,  0.0052]])]})
3
{'1': tensor([[-0.0157, -0.0155, -0.0185,  ..., -0.0165, -0.0093, -0.0050]]), '2': tensor([[ 0.0016, -0

100%|███████████████████████████████████████████| 71/71 [00:01<00:00, 45.21it/s]

1
{'1': tensor([[-0.0236, -0.0147, -0.0141,  ..., -0.0751, -0.0712, -0.0740]]), '2': tensor([[ 0.0092, -0.0037, -0.0036,  ..., -0.0059, -0.0075, -0.0022]]), '3': tensor([[0.0068, 0.0057, 0.0075,  ..., 0.0041, 0.0073, 0.0019]]), '4': tensor([[-0.0034, -0.0024, -0.0020,  ...,  0.0070,  0.0076,  0.0004]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0236, -0.0147, -0.0141,  ..., -0.0751, -0.0712, -0.0740]])]})
2
{'1': tensor([[-0.0236, -0.0147, -0.0141,  ..., -0.0751, -0.0712, -0.0740]]), '2': tensor([[ 0.0092, -0.0037, -0.0036,  ..., -0.0059, -0.0075, -0.0022]]), '3': tensor([[0.0068, 0.0057, 0.0075,  ..., 0.0041, 0.0073, 0.0019]]), '4': tensor([[-0.0034, -0.0024, -0.0020,  ...,  0.0070,  0.0076,  0.0004]])}
defaultdict(<class 'list'>, {'1': [tensor([[-0.0236, -0.0147, -0.0141,  ..., -0.0751, -0.0712, -0.0740]])], '2': [tensor([[ 0.0092, -0.0037, -0.0036,  ..., -0.0059, -0.0075, -0.0022]])]})
3
{'1': tensor([[-0.0236, -0.0147, -0.0141,  ..., -0.0751, -0.0712, -0.0740]]), '2': tensor([




In [252]:
results_wo = evaluate_tracks_chunks_with_overlap("separations/debug/sep_round_0",chunk_duration=4, overlap_duration=0 , orig_sr=22050, eps=1e-8)
results_wo.shape

100%|███████████████████████████████████████████| 71/71 [00:01<00:00, 66.52it/s]


(300, 4)

In [253]:
mean_results = results.mean()
print("Mean results:")
#print(mean_results)

# Access mean values by column name
mean_col1 = mean_results['1']
mean_col2 = mean_results['2']
mean_col3 = mean_results['3']
mean_col4 = mean_results['4']

print("\nMean values by column name:")
print("Column 1:", mean_col1)
print("Column 2:", mean_col2)
print("Column 3:", mean_col3)
print("Column 4:", mean_col4)

mean_of_means = mean_results.mean()

print("Mean of means:", mean_of_means)

Mean results:

Mean values by column name:
Column 1: 15.330938807115869
Column 2: 16.623296433211493
Column 3: 12.613785855646984
Column 4: 12.834234768795856
Mean of means: 14.35056396619255


In [140]:

mean_results = results_wo.mean()
print("Mean results:")
#print(mean_results)

# Access mean values by column name
mean_col1 = mean_results['1']
mean_col2 = mean_results['2']
mean_col3 = mean_results['3']
mean_col4 = mean_results['4']

print("\nMean values by column name:")
print("Column 1:", mean_col1)
print("Column 2:", mean_col2)
print("Column 3:", mean_col3)
print("Column 4:", mean_col4)

mean_of_means = mean_results.mean()

print("Mean of means:", mean_of_means)


Mean results:

Mean values by column name:
Column 1: 15.995791120204673
Column 2: 17.038332711095396
Column 3: 12.855836246324623
Column 4: 13.23683948485297
Mean of means: 14.781699890619416


In [120]:
file_length = 12
chunk_length = 4
overlap = 2

current_length = chunk_length
num_chunks = 1

while current_length + overlap <= file_length:
    num_chunks += 1
    current_length += overlap

total_files = 71

total_chunks = num_chunks * total_files


In [121]:
total_chunks

355

In [249]:
file_length = 12
chunk_length = 4

num_chunks_per_file = file_length // chunk_length
has_remainder = file_length % chunk_length > 0
total_files = 71
total_chunks = int(num_chunks_per_file * total_files)

if has_remainder:
    total_chunks += total_files


In [250]:
total_chunks

213