In [1]:
import torch
from torch import nn
import numpy as np
from tqdm import tqdm
from torch.utils.data import DataLoader
from helpers.plot import plot_fit
import matplotlib.pyplot as plt
from lightning_trainer import UnetDACLighting
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import WandbLogger, TensorBoardLogger

from audio_dataset import DictTorchPartedDataset, PinDictTorchPartedDataset
from typing import List
from trainer import AudioTrainer
import os
import pandas as pd

from unet_dac import UnetDAC
import lightning as L

In [2]:
from config import NUM_MICS, ANGLE_RES

L_v = 96
K = 256
# INPUT_LEN = 64
# VIRTUAL_BATCH_SIZE = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UnetDAC(L=L_v, K=K, M=NUM_MICS).to(device)

In [3]:
train_bs = 16
lr: float = 1e-3

model_name = "unet_doa_batch{train_bs}_lr{lr:.0e}"
train_dataset = PinDictTorchPartedDataset('data_batches', 'train06r076' , ['samples', 'ref_stft', 'target'], real_batch_size=64, virtual_batch_size=1, device=device)
train_dataloader = DataLoader(train_dataset, batch_size=train_bs, shuffle=True, num_workers=4, persistent_workers=True, prefetch_factor=16)
test_dataset = PinDictTorchPartedDataset('data_batches', 'test10tgtprc' , ['samples', 'ref_stft', 'target'], real_batch_size=30, virtual_batch_size=1, device=device)
test_dataloader = DataLoader(test_dataset, batch_size=30, shuffle=False)
criterion = nn.CrossEntropyLoss()
model_lighting = UnetDACLighting(model, criterion, lr, device=device)
# wandb_logger = WandbLogger(log_model="all", project='AudioDOA', name='bs=64,sig0.6 clean. 0.76 with reverb')
logger = TensorBoardLogger("tb_logs", name=model_name)

trainer = L.Trainer(max_epochs=100,
                    callbacks=[EarlyStopping(monitor="train_loss", mode="min", patience=3)],
                    default_root_dir=model_name,
                    log_every_n_steps=9,
                    logger=logger)
trainer.fit(model_lighting, train_dataloaders=train_dataloader, val_dataloaders=test_dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: tb_logs\unet_doa_batch64_lr1e-3
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | model   | UnetDAC          | 1.9 M 
1 | loss_fn | CrossEntropyLoss | 0     
---------------------------------------------
1.9 M     Trainable params
0         Non-trainable params
1.9 M     Total params
7.772     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\agadi\miniconda3\envs\audio_env\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [11]:
i0 = train_dataset[0][0].cuda()
print(i0.shape)

torch.Size([14, 256, 96])


In [None]:
# dataset = DistributedTorchDataset('data_batches', 'train' , ['samples', 'ref_stft', 'target'])
train_dataset = DictTorchPartedDataset('data_batches', 'trainv2' , ['samples', 'ref_stft', 'target'], real_batch_size=64, virtual_batch_size=1, device=device)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_dataset = DictTorchPartedDataset('data_batches', 'testv2' , ['samples', 'ref_stft', 'target'], real_batch_size=30, virtual_batch_size=1, device=device)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=True)
criterion = nn.CrossEntropyLoss()
lr: float = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
trainer = AudioTrainer(model=model, loss_fn=criterion, optimizer=optimizer, device=device)
fit_res = trainer.fit(train_dataloader, test_dataloader, num_epochs=100, checkpoints='checkpoints', early_stopping=5, print_every=10)
# fig, axes = plot_fit(fit_res)

*** Loading checkpoint file checkpoints.pt
*** best_loss=2.36 ewi=0
--- EPOCH 1/100 ---
train_batch (Avg. Loss 2.371): 100%|██████████| 8/8 [00:29<00:00,  3.70s/it]
test_batch (Avg. Loss 2.556): 100%|██████████| 4/4 [00:37<00:00,  9.34s/it]
train_batch (Avg. Loss 2.354): 100%|██████████| 8/8 [00:25<00:00,  3.24s/it]
test_batch (Avg. Loss 2.581): 100%|██████████| 4/4 [00:40<00:00, 10.17s/it]
*** Saved checkpoint checkpoints.pt at epoch 2
train_batch (Avg. Loss 2.357): 100%|██████████| 8/8 [00:28<00:00,  3.59s/it]
test_batch (Avg. Loss 2.561): 100%|██████████| 4/4 [00:35<00:00,  8.79s/it]
train_batch (Avg. Loss 2.351): 100%|██████████| 8/8 [00:27<00:00,  3.41s/it]
test_batch (Avg. Loss 2.558): 100%|██████████| 4/4 [00:32<00:00,  8.17s/it]
*** Saved checkpoint checkpoints.pt at epoch 4
train_batch (Avg. Loss 2.359): 100%|██████████| 8/8 [00:26<00:00,  3.31s/it]
test_batch (Avg. Loss 2.566): 100%|██████████| 4/4 [00:32<00:00,  8.08s/it]
train_batch (Avg. Loss 2.350): 100%|██████████| 8/8 [

In [2]:
# NUM_TRAIN_EPOCHS = 100
# lr: float = 2e-3
# epochs: int = 100
# early_stopping: int = 3
# mininbatch_size: int = 16

# train_statistics = []
# dataset = DistributedTorchDataset('data_batches', 'train' , ['samples', 'ref_stft', 'target'], virtual_batch_size=1)
# dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=lr)


In [11]:
import bsseval

class SeparatedSource:
	"""Represents the part of the received sound that comes from this
	specific source (angle).
	"""
	def __init__(self, ref_spec, probs):
		# The ref mic's spectrogram, shape=(Bmw, t)
		self.ref_spec = ref_spec
		# The model's output for this angle, shape=(w, t)
		self.probs = probs
	
	def energy(self):
		return torch.sum(self.probs * abs(self.ref_spec) ** 2)
	
	def spec(self):
		mag = abs(self.ref_spec) * self.probs
		phase = torch.angle(self.ref_spec)
		return mag * torch.exp(1j * phase)
	
	def metrics(self):
		"""Retuns SDR, ISR, SIR, SAR."""
		sep_signal = torch.istft(self.spec(), ...)  # <-- Finish me
		# TODO: maybe use the reference time signal directly?
		ref_signal = torch.istft(self.ref_spec, ...)  # <-- Finish me
		
		return bsseval.evaluate(
			references=sep_signal.reshape(...),  # <-- Finish me
			estimates=ref_signal.reshape(...),  # <-- Finish me
			# win=1*44100,
			# hop=1*44100,
			# mode='v4',
			# padding=True
		)
	
	@classmethod
	def speaker_angles(cls, sources):
		"""Returns the 2 angle numbers where the speakers are (most
		likely) located.
		"""
		energies = [src.energy() for src in sources]
		# `argpartition` to get the indices, AKA angle numbers.
		partitioned = np.argpartition(energies, angle_count - 2)
		max_angles = tuple(partitioned[-2:])
		return max_angles
	
	@classmethod
	def sample_metrics(cls, ref_spec, samp_probs):
		"""Retuns the metrics for this sample's data & label."""
		# `samp_probs`'s shape is (angle_count, w, t).
		# `sources[i]` is the separeted source coming from the direction
		# theta_i.
		sources = [SeparatedSource(ref_spec, probs) for probs in samp_probs]
		
		speaker_angles = SeparatedSource.speaker_angles(sources)
		speaker_metrics = [
			sources[angle.spec].metrics()
			for angle in speaker_angles
		]
		return speaker_metrics

# Loop for each sample in the batch
for ref_spec, samp_probs in zip(batch_ref_specs, batch_outputs):
	print(SeparatedSource.sample_metrics(ref_spec, samp_probs))


NameError: name 'batch_ref_specs' is not defined

### Train loop - manual ###

In [6]:
# losses = []
# for epoch in range(epochs):
#     running_loss = 0.0
#     print(f'--- epoch {epoch + 1} ---')

#     # Train on minibatches
#     for i, minibatch in tqdm(enumerate(dataloader), desc=f'Epoch {epoch} trainig batches', total=len(dataloader)):
#         samples, ref_abs_square, target = minibatch
#         # print(f"hello I am a minibatch! my dimensions are:")
#         # print(f"samples.shape={samples.shape}\nref_stft.shape={ref_stft.shape}\ntarget.shape={target.shape}")
#         # Forward + backward + optimize
#         optimizer.zero_grad()
#         outputs = model(samples)
#         # TODO
#         # output_directions = torch.dot(outputs, ref_stft * ref_stft.T)
#         # output_angle = torch.argmax(output_directions, axis=1)
#         loss = criterion(outputs, target // ANGLE_RES)
#         loss.backward()
#         optimizer.step()

#         # Statistics
#         running_loss += loss.item() / VIRTUAL_BATCH_SIZE
#         losses.append({'epoch': epoch, 'batch': i, 'loss': loss.item()})
#         if i % 8 == 7:    # print every 2000 mini-batches
#             print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss :.3f}')
#             # running_loss = 0.0

#     # Validation on minibatches

In [7]:
# loss_df = pd.DataFrame(losses)
# display(loss_df)
# print(f"Average loss: {loss_df.loss.mean()}")

In [8]:
# from transformers import Trainer, TrainingArguments
# from transformers.data.data_collator import DataCollator
# import torch.nn as nn
# import torch.optim as optim

# training_args = TrainingArguments(
#     output_dir='./results',          # output directory
#     num_train_epochs=100,              # total number of training epochs
#     per_device_train_batch_size=1,  # batch size per device during training
#     logging_dir='./logs',            # directory for storing logs
#     logging_steps=10,
#     learning_rate=0.001,             # learning rate
# )

# optimizer = optim.Adam(model.parameters(), lr=training_args.learning_rate)
# loss_fn = nn.CrossEntropyLoss()
# train_dataset = DictTorchPartedDataset('data_batches', 'train' , ['samples', 'ref_stft', 'target'], real_batch_size=64, virtual_batch_size=1, device=device)
# train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# test_dataset = DictTorchPartedDataset('data_batches', 'test' , ['samples', 'ref_stft', 'target'], real_batch_size=30, virtual_batch_size=1, device=device)
# test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=True)

# def collate_fn(features):
#     # Assuming features is a list of dictionaries with your custom keys
    
#     return {'data': [feature[0] for feature in features]}
    
    
# # Define the Hugging Face Trainer
# trainer = Trainer(
#     model=model,                         # the instantiated 🤗 Transformers model to be trained
#     args=training_args,                  # training arguments, defined above
#     train_dataset=train_dataset,            # training dataset
#     eval_dataset=test_dataset,                   # evaluation dataset
#     compute_metrics=None,                # any additional metrics you want to compute
#     optimizers=(optimizer, None),        # (optimizer, scheduler), scheduler is None here
#     data_collator=collate_fn                   # data collator, default collate_fn for torch DataLoader
# )

# trainer.train()

In [9]:
criterion = nn.CrossEntropyLoss()
loss_a = criterion(torch.tensor([[0,0,0,0,1.,0,0,0,0,0,0,0,0]]), torch.tensor([[0,0,0,0,1.,0,0,0,0,0,0,0,0]]))
loss_b = criterion(torch.tensor([[0,0,0,0,1.,0,0,0,0,0,0,0,0]]), torch.tensor([4]))
print(f"loss_a={loss_a}, loss_b={loss_b}")

loss_a=1.689090371131897, loss_b=1.689090371131897


In [46]:
d1 = torch.load('data_batches/trainv2_0.pt')
for k,v in d1.items():
    d1[k] = torch.tensor(v[:2].detach().numpy())

In [7]:
saved_state = torch.load('checkpoints.pt')
model.load_state_dict(saved_state['model_state'])
d1 = torch.load('example_batch2.pt')
outputs = model(d1['samples'].cuda())

In [48]:
d1['probs'] = torch.tensor(outputs.cpu().detach().numpy())
print(d1.keys())
# for k,v in d1.items():
    # d1[k] = torch.tensor(v.cpu().detach().numpy())
torch.save(d1, 'example_batch2.pt')


dict_keys(['samples', 'ref_stft', 'target', 'perceived_signals', 'doas', 'probs'])
