In [1]:
import numpy as np

In [2]:
import os
import torch
from torch.utils.data import DataLoader, random_split
from lightning.pytorch import Trainer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'  

In [4]:
from contrastive_model.contrastive_model import CoCola
from data.coco_chorales_contrastive_preprocessed import CocoChoralesContrastivePreprocessed
from data.moisesdb_contrastive_preprocessed import MoisesdbContrastivePreprocessed
from data.slakh2100_contrastive_preprocessed import Slakh2100ContrastivePreprocessed
from data.musdb_contrastive_preprocessed import MusdbContrastivePreprocessed
from contrastive_model import constants


In [27]:
#CHECKPOINT = '/speech/dbwork/mul/spielwiese4/students/demancum/cocola/k5llcdm8/checkpoints/epoch=21-step=21406.ckpt' #bumbling-breeze-34 bs=512 chunk_dur=2s
#CHECKPOINT = '/speech/dbwork/mul/spielwiese4/students/demancum/cocola/r71kxj7r/checkpoints/epoch=36-step=29119.ckpt' #good-wave-24 bs=256 chunk_dur=5s
#CHECKPOINT = '/speech/dbwork/mul/spielwiese4/students/demancum/cocola/shov44gs/checkpoints/epoch=24-step=19900.ckpt' #golden-dew-25 bs=128 chunk_dur=10s

#CHECKPOINT = '/speech/dbwork/mul/spielwiese4/students/demancum/cocola/zr304efn/checkpoints/epoch=23-step=17448.ckpt' #flowing-sound-26 bs=96 chunk_dur=15s

#CHECKPOINT = '/speech/dbwork/mul/spielwiese4/students/demancum/cocola/8jtiowyz/checkpoints/epoch=46-step=38963.ckpt' #jumping-breeze-27 bs=64 chunk_dur=20s

#CHECKPOINT = '/speech/dbwork/mul/spielwiese3/demancum/cocola/e3xubuzt/checkpoints/epoch=38-step=245622.ckpt' #frosty-silence-39 musdb 32bs test
CHECKPOINT = '/speech/dbwork/mul/spielwiese3/demancum/cocola_hpss/ciflwfwc/checkpoints/epoch=59-step=377880.ckpt' #astral-valley-21 more_negative_RAND_MASK_DOUBLE_CHANNEL

model = CoCola.load_from_checkpoint(CHECKPOINT)
trainer = Trainer()


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


## Setting embedding mode for the tests (for DOUBLE_CHANNEL_HARMONIC_PERCUSSIVE models only)

In [28]:
model.set_embedding_mode(constants.EmbeddingMode.BOTH) # constants.EmbeddingMode.PERCUSSIVE, constants.EmbeddingMode.BOTH or constants.EmbeddingMode.HARMONIC

## MOISESDB

In [33]:
dataset = MoisesdbContrastivePreprocessed(
    root_dir='/disk1/demancum/moisesdb_contrastive',
    preprocess=False
)

_, _, test_dataset = random_split(
    dataset=dataset, lengths=[0.8, 0.1, 0.1], generator=torch.Generator().manual_seed(42))

test_dataloader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    drop_last=True,
    num_workers=os.cpu_count(),
    persistent_workers=True
    )


INFO:Found original dataset at /disk1/demancum/moisesdb_contrastive/moisesdb_v0.1.
INFO:Found preprocessed dataset at /disk1/demancum/moisesdb_contrastive/preprocessed_hpss.


In [34]:
trainer.test(model=model, dataloaders=test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 32/32 [00:01<00:00, 19.81it/s]


[{'test_loss': 1.8591742515563965, 'test_accuracy': 0.5634765625}]

## Reults MOISESDB

bumbling-breeze-34 bs=512 chunl_dur=2s test on moisesdb bs=512 [{'test_loss': 30.519054412841797, 'test_accuracy': 0.396484375}]

bumbling-breeze-34 bs=512 chunl_dur=2s test on moisesdb bs=64 [{'test_loss': 29.30805206298828, 'test_accuracy': 0.5985725308641975}]

bumbling-breeze-34 bs=512 chunl_dur=2s test on moisesdb bs=2 [{'test_loss': 25.239595413208008, 'test_accuracy': 0.8775077160493827}]

---

good-wave-24 bs=256 chunl_dur=5s, test on moisesdb bs=256 [{'test_loss': 2.751087188720703, 'test_accuracy': 0.41650390625}] 

good-wave-24 bs=256 chunl_dur=5s, test on moisesdb bs=64 [{'test_loss': 1.7366595268249512, 'test_accuracy': 0.5849609375}]

good-wave-24 bs=256 chunl_dur=5s, test on moisesdb bs=2 [{'test_loss': 0.1633053719997406, 'test_accuracy': 0.9415148609779481}]

---

golden-dew-25 bs=128 chunk_dur=10s test on moisesdb bs=128 [{'test_loss': 14.337109565734863, 'test_accuracy': 0.35379464285714285}]

golden-dew-25 bs=128 chunk_dur=10s test on moisesdb bs=64 [{'test_loss': 13.74421501159668, 'test_accuracy': 0.4654017857142857}]

golden-dew-25 bs=128 chunk_dur=10s test on moisesdb bs=2 [{'test_loss': 10.538426399230957, 'test_accuracy': 0.8828451882845187}]

---
flowing-sound-26 bs=96 chunl_dur=15s test on moisesdb bs=96 [{'test_loss': 3.2646923065185547, 'test_accuracy': 0.24702380952380953}]

flowing-sound-26 bs=96 chunl_dur=15s test on moisesdb bs=64 [{'test_loss': 2.8988101482391357, 'test_accuracy': 0.28267045454545453}]

flowing-sound-26 bs=96 chunl_dur=15s test on moisesdb bs=2 [{'test_loss': 0.29432547092437744, 'test_accuracy': 0.8728813559322034}]

---

jumping-breeze-27 bs=64 chunl_dur=20s test on moisesdb bs=64 [{'test_loss': 3.286912441253662, 'test_accuracy': 0.38671875}]

jumping-breeze-27 bs=64 chunl_dur=20s test on moisesdb bs=2 [{'test_loss': 0.5389121770858765, 'test_accuracy': 0.8936567164179104}]

---

frosty-silence-39 bs=32 chunk_dur=5s test on moisesdb bs=64 [{'test_loss': 1.6719969511032104, 'test_accuracy': 0.60546875}]

frosty-silence-39 bs=32 chunk_dur=5s test on moisesdb bs=2  [{'test_loss': 0.17246873676776886, 'test_accuracy': 0.9419942473633748}]


## Results MusDB HPSS
astral-valley-21 bs=32 chunk_dur=5s test on moisesdb bs=64 [{'test_loss': 1.8591742515563965, 'test_accuracy': 0.5634765625}]

astral-valley-21 bs=32 chunk_dur=5s test on moisesdb bs=2 [{'test_loss': 0.1839357316493988, 'test_accuracy': 0.9352828379674016}]




## slakh2100

In [36]:
test_dataset = Slakh2100ContrastivePreprocessed(
    root_dir='/disk1/demancum/slakh2100_contrastive',
    download=False,
    preprocess=False,
    split="test")
    

test_dataloader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    drop_last=True,
    num_workers=os.cpu_count(),
    persistent_workers=True
    )

trainer.test(model=model, dataloaders=test_dataloader)

INFO:Found original dataset split test at /disk1/demancum/slakh2100_contrastive/original/slakh2100_redux_16k/test.
INFO:Found preprocessed dataset split test at /disk1/demancum/slakh2100_contrastive/preprocessed_hpss/test.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 248/248 [00:12<00:00, 19.73it/s]


[{'test_loss': 1.6199898719787598, 'test_accuracy': 0.6010584677419355}]

## Results slakh2100

bumbling-breeze-34 bs=512 chunk_dur=2s test on slakh2100 bs=64 [{'test_loss': 2.6327898502349854, 'test_accuracy': 0.5836530694668821}]

bumbling-breeze-34 bs=512 chunk_dur=2s test on slakh2100 bs=2 [{'test_loss': 1.0304263830184937, 'test_accuracy': 0.9223624432104998}]

--- 

good-wave-24 bs=256 chunk_dur=5s test on slakh2100 bs=64 [{'test_loss': 2.032883405685425, 'test_accuracy': 0.5696194556451613}]

good-wave-24 bs=256 chunk_dur=5s test on slakh2100 bs=2 [{'test_loss': 0.24336093664169312, 'test_accuracy': 0.9187012333249435}]

---

golden-dew-25 bs=128 chunk_dur=10s test on slakh2100 bs=64 [{'test_loss': 1.9303451776504517, 'test_accuracy': 0.5871975806451613}]

golden-dew-25 bs=128 chunk_dur=10s test on slakh2100 bs=2 [{'test_loss': 0.18359966576099396, 'test_accuracy': 0.9335086401202103}]

---

flowing-sound-26 bs=96 chunk_dur=15s test on slakh2100 bs=64 [{'test_loss': 2.262129068374634, 'test_accuracy': 0.4759036144578313}]

flowing-sound-26 bs=96 chunk_dur=15s test on slakh2100 bs=2 [{'test_loss': 0.1931096464395523, 'test_accuracy': 0.9245883233532934}]

---
frosty-silence-39 bs=32 chunk_dur=5s test on slakh2100 bs=64  [{'test_loss': 1.6478705406188965, 'test_accuracy': 0.612273185483871}]

frosty-silence-39 bs=32 chunk_dur=5s test on slakh2100 bs=2  [{'test_loss': 0.15587395429611206, 'test_accuracy': 0.9336143971809716}]

## Results slakh2100 HPSS
astral-valley-21 bs=32 chunk_dur=5s test on moisesdb bs=64 [{'test_loss': 1.6199898719787598, 'test_accuracy': 0.6010584677419355}]

astral-valley-21 bs=32 chunk_dur=5s test on moisesdb bs=2 [{'test_loss': 0.13791394233703613, 'test_accuracy': 0.9413541404480242}]



## COCOCHORALES

In [39]:
test_dataset = CocoChoralesContrastivePreprocessed(
    root_dir='/disk1/demancum/coco_chorales_contrastive',
    download=False,
    preprocess=False,
    split="test",
    ensemble="*")
    
test_dataloader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    drop_last=True,
    num_workers=os.cpu_count(),
    persistent_workers=True
    )


INFO:Found original dataset split test at /disk1/demancum/coco_chorales_contrastive/original/test.
INFO:Found preprocessed dataset split test at /disk1/demancum/coco_chorales_contrastive/preprocessed_hpss/test.


In [40]:
trainer.test(model=model, dataloaders=test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 291/291 [00:14<00:00, 20.17it/s]


[{'test_loss': 0.22545965015888214, 'test_accuracy': 0.9272981099656358}]

## Results COCOCHORALES

bumbling-breeze-34 bs=512 chunk_dur=2s test on COCOCHORALES bs=64 [{'test_loss': 3.28816819190979, 'test_accuracy': 0.9072003182461104}]

bumbling-breeze-34 bs=512 chunk_dur=2s test on COCOCHORALES bs=2 [{'test_loss': 2.5330116748809814, 'test_accuracy': 0.9808440123729563}]

---

good-wave-24 bs=256 chunk_dur=5s test on COCOCHORALES bs=64 [{'test_loss': 0.24012035131454468, 'test_accuracy': 0.9311640893470791}]

good-wave-24 bs=256 chunk_dur=5s test on COCOCHORALES bs=2 [{'test_loss': 0.011269009672105312, 'test_accuracy': 0.997055043906618}]

---

golden-dew-25 bs=128 chunk_dur=10s test on COCOCHORALES bs=64 [{'test_loss': 1.371890902519226, 'test_accuracy': 0.9197303921568627}]

golden-dew-25 bs=128 chunk_dur=10s test on COCOCHORALES bs=2 [{'test_loss': 0.5721641182899475, 'test_accuracy': 0.9725274725274725}]

---
flowing-sound-26 bs=96 chunk_dur=15s test on COCOCHORALES bs=64 [{'test_loss': 0.1298881322145462, 'test_accuracy': 0.9505208333333334}]

flowing-sound-26 bs=96 chunk_dur=15s test on COCOCHORALES bs=2 [{'test_loss': 0.0018012290820479393, 'test_accuracy': 0.9992791234140715}]

---
frosty-silence-39 bs=32 chunk_dur=5s test on COCOCHORALES bs=64 [{'test_loss': 0.2683464288711548, 'test_accuracy': 0.9119415807560137}]

frosty-silence-39 bs=32 chunk_dur=5s test on COCOCHORALES bs=2 [{'test_loss': 0.010394576005637646, 'test_accuracy': 0.9968408652816448}]


## Results COCOCHORALES HPSS
astral-valley-21 bs=32 chunk_dur=5s test on COCOCHORALES bs=64 [{'test_loss': 0.22545965015888214, 'test_accuracy': 0.9272981099656358}]

astral-valley-21 bs=32 chunk_dur=5s test on COCOCHORALES bs=2 [{'test_loss': 0.0063800569623708725, 'test_accuracy': 0.9976440351252944}]



## MusDB


In [30]:
from contrastive_model.contrastive_model_data import hpss

# Using train split since it is not used from training
test_dataset = MusdbContrastivePreprocessed(
    root_dir='/disk1/demancum/musdb_contrastive',
    download=False,
    preprocess=True, # Need to preprocess now because it is not used at training time
    split="train",
    chunk_duration=5,
    target_sample_rate=16000,
    generate_submixtures=True,
    device="cuda",
    transform=hpss
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    drop_last=True,
    num_workers=os.cpu_count(),
    persistent_workers=True
    )

trainer.test(model=model, dataloaders=test_dataloader)



INFO:Found original dataset split train at /disk1/demancum/musdb_contrastive/original/train.
INFO:Found preprocessed dataset split train at /disk1/demancum/musdb_contrastive/preprocessed_hpss/train.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 144/144 [00:07<00:00, 19.34it/s]


[{'test_loss': 1.8711408376693726, 'test_accuracy': 0.5638020833333333}]

## Results MusDB

bumbling-breeze-34 bs=512 chunk_dur=2s test on MusDB bs=64 [{'test_loss': 17.289331436157227, 'test_accuracy': 0.4112255586592179}]

bumbling-breeze-34 bs=512 chunk_dur=2s test on MusDB bs=2 [{'test_loss': 13.33316707611084, 'test_accuracy': 0.8486590038314177}]

---

good-wave-24 bs=256 chunk_dur=5s test on MusDB bs=64 [{'test_loss': 4.149162769317627, 'test_accuracy': 0.3428819444444444}]

good-wave-24 bs=256 chunk_dur=5s test on MusDB bs=2 [{'test_loss': 0.6049458980560303, 'test_accuracy': 0.8654470664646027}]

---

golden-dew-25 bs=128 chunk_dur=10s test on MusDB bs=64 [{'test_loss': 12.250128746032715, 'test_accuracy': 0.296875}]


golden-dew-25 bs=128 chunk_dur=10s test on MusDB bs=2 [{'test_loss': 6.9717888832092285, 'test_accuracy': 0.8437097466723916}]

---

flowing-sound-26 bs=96 chunk_dur=15s test on MusDB bs=64 [{'test_loss': 4.6844329833984375, 'test_accuracy': 0.2688802083333333}]

flowing-sound-26 bs=96 chunk_dur=15s test on MusDB bs=2 [{'test_loss': 0.8233780264854431, 'test_accuracy': 0.7988505747126438}]

---

frosty-silence-39 bs=32 chunk_dur=5s test on MusDB bs=64 [{'test_loss': 2.596252202987671, 'test_accuracy': 0.5073784722222222}]

frosty-silence-39 bs=32 chunk_dur=5s test on MusDB bs=2 [{'test_loss': 0.30304449796676636, 'test_accuracy': 0.9180558562459407}]

## Results MusDB HPSS
astral-valley-21 bs=32 chunk_dur=5s test on MusDB bs=64 [{'test_loss': 1.8711408376693726, 'test_accuracy': 0.5638020833333333}]
astral-valley-21 bs=32 chunk_dur=5s test on MusDB bs=2 [{'test_loss': 0.19757261872291565, 'test_accuracy': 0.9332106516562026}]
