In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from lightning_trainer import UnetDACLighting
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import WandbLogger, TensorBoardLogger

from audio_dataset import DictTorchPartedDataset, PinDictTorchPartedDataset

from unet_dac import UnetDAC
import lightning as L

In [2]:
from config import NUM_MICS, ANGLE_RES


L_v = 96
K = 256
# INPUT_LEN = 64
# VIRTUAL_BATCH_SIZE = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UnetDAC(L=L_v, K=K, M=NUM_MICS).to(device)

lr = 1e-3
train_bs = 64
validation_bs = train_bs
model_name = f"unet_doa_batch{train_bs}_lr{lr:.0e}_v2_r"

logger = TensorBoardLogger("tb_logs", name=model_name)

trainer = L.Trainer(max_epochs=100,
                    callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=3)],
                    default_root_dir=model_name,
                    log_every_n_steps=9,
                    logger=logger)

criterion = nn.CrossEntropyLoss()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


# Training #

In [None]:
model_lighting = UnetDACLighting(model, criterion, lr)

In [3]:
train_dataset = PinDictTorchPartedDataset('data_batches', 'train06r076v2' , ['samples', 'target'], real_batch_size=64, virtual_batch_size=1, device=device)
validation_dataset = PinDictTorchPartedDataset('data_batches', 'validation06r076v2' , ['samples', 'target'], real_batch_size=64, virtual_batch_size=1, device=device)

train_dataloader = DataLoader(train_dataset, batch_size=train_bs, shuffle=True, num_workers=4, persistent_workers=True, prefetch_factor=16)
valiadtion_dataloader = DataLoader(validation_dataset, batch_size=validation_bs, shuffle=True, num_workers=4, persistent_workers=True, prefetch_factor=16)

In [4]:
# model_lighting = UnetDACLighting(model, criterion, lr)


# wandb_logger = WandbLogger(log_model="all", project='AudioDOA', name='bs=64,sig0.6 clean. 0.76 with reverb')

trainer.fit(model_lighting, train_dataloaders=train_dataloader, val_dataloaders=valiadtion_dataloader)
# trainer.test(model_lighting, dataloaders=test_dataloader)

Missing logger folder: tb_logs\unet_doa_batch64_lr1e-03_v2_r
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\agadi\miniconda3\envs\audio_env\lib\site-packages\lightning\pytorch\core\optimizer.py:257: Found unsupported keys in the lr scheduler dict: {'verbose', 'mode', 'factor', 'patience'}. HINT: remove them from the output of `configure_optimizers`.

  | Name    | Type             | Params
---------------------------------------------
0 | model   | UnetDAC          | 1.9 M 
1 | loss_fn | CrossEntropyLoss | 0     
---------------------------------------------
1.9 M     Trainable params
0         Non-trainable params
1.9 M     Total params
7.772     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\agadi\miniconda3\envs\audio_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

# Testing #

In [3]:
model_lighting = UnetDACLighting.load_from_checkpoint('tb_logs/unet_doa_batch64_lr1e-03_v2_r/version_0/checkpoints/epoch=30-step=2914.ckpt', model=model, loss_fn=criterion)

In [10]:
test_dataset = PinDictTorchPartedDataset('data_batches', 'test10r0235revrad' , ['samples', 'ref_stft', 'target', 'mixed_signals', 'perceived_signals'], real_batch_size=60, virtual_batch_size=1, device=device)
test_dataloader = DataLoader(test_dataset, batch_size=3, shuffle=False)

In [12]:
trainer.test(model_lighting, dataloaders=test_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'avg_mix_0_0_epoch': 0.23055291175842285,
  'avg_mix_0_1_epoch': inf,
  'avg_mix_1_0_epoch': -0.11127140372991562,
  'avg_mix_1_1_epoch': inf,
  'avg_sep_0_0_epoch': -4.078956127166748,
  'avg_sep_0_1_epoch': inf,
  'avg_sep_1_0_epoch': -5.046751499176025,
  'avg_sep_1_1_epoch': inf}]

In [4]:
# all_t_data = torch.load('data_batches/train06r076_42.pt')
# all_t_data_small = {k: v[:2] for k,v in all_t_data.items()}
# probs = model_lighting.model(all_t_data_small['samples'].cuda())
# print(probs.shape)
# print(probs.device)
# print(all_t_data_small['samples'].device)

# all_t_data_small['probs'] = probs.detach().cpu()
# torch.save(all_t_data_small, 'samples_test1605_v2.pt')
# print('\n'.join([f"{k}: {v.shape}" for k,v in all_t_data_small.items()]))

In [6]:
from metrics import SeparatedSource

for i, batch in enumerate(test_dataset):
    samples, ref_stft, target, mixed_signals, perceived_signals = batch
    samples = samples.to('cuda', dtype=torch.float)
    print(samples.shape)
    probs = model(samples)
    ref_spec = ref_stft.detach().cpu().numpy()
    samp_probs = probs.detach().cpu().numpy()
    sep_src = SeparatedSource(ref_spec[1:], samp_probs)
    sep_src.save(f"sep_{i}.wav")

torch.Size([14, 256, 1280])


ValueError: expected 4D input (got 3D input)