In [1]:
import torch
from tqdm import tqdm
import atomize
import hydra
from hydra import initialize_config_dir, compose
import os
import mdct
from omegaconf import OmegaConf
from pytorch_lightning.utilities.model_summary import summarize

# for debugging purposes
%env CUDA_LAUNCH_BLOCKING=1

# load the hydra config file
abs_config_dir=os.path.abspath("../scripts/conf")
with initialize_config_dir(version_base=None, config_dir=abs_config_dir):
    cfg=compose(config_name="config.yaml")

env: CUDA_LAUNCH_BLOCKING=1


In [2]:
# build the datapipe
atomizer, pipe, loader = atomize.get_data(cfg.data)
system = atomize.get_system(cfg.system, atomizer)
trainer = hydra.utils.instantiate(cfg.trainer)

# summarize
print(summarize(system, max_depth=2))

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


  | Name                          | Type               | Params
---------------------------------------------------------------------
0 | model                         | AtomsLocator       | 51.9 M
1 | model.featurewise_encoding    | ModuleDict         | 918 K 
2 | model.joint_location_encoding | SinusoidalEmbedder | 540   
3 | model.masks                   | ParameterDict      | 1.8 K 
4 | model.mlp_pe                  | Mlp                | 1.0 M 
5 | model.encoder_blocks          | ModuleList         | 18.7 M
6 | model.norm                    | LayerNorm          | 720   
7 | model.decoder_blocks          | ModuleList         | 25.0 M
8 | model.predictors              | ModuleDict         | 6.3 M 
---------------------------------------------------------------------
51.9 M    Trainable params
0         Non-trainable params
51.9 M    Total params
207.718   Total estimated model params size (MB)


In [None]:
batch = next(iter(loader))

output = system.training_step(batch, 0)


In [3]:
trainer.fit(system, loader)

  rank_zero_warn("You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type         | Params
---------------------------------------
0 | model | AtomsLocator | 51.9 M
---------------------------------------
51.9 M    Trainable params
0         Non-trainable params
51.9 M    Total params
207.718   Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Training: 0it [00:00, ?it/s]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mliutkus[0m. Use [1m`wandb login --relogin`[0m to force relogin


  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [6]:
sample = output['pred']
sample = atomize.data.uncollate(sample)
print(sample[0])
waveforms = atomizer.backward(sample) 
print(waveforms[0].shape)

{'velocities': tensor([249, 237, 173,  ..., 273, 277, 240]), 'masks': tensor([1., 1., 1.,  ..., 1., 1., 1.]), 'misc_max_mags': tensor([11.3024], dtype=torch.float64), 'misc_num_freqs': tensor([1024]), 'misc_num_chans': tensor([1]), 'misc_num_times': tensor([131]), 'freqs': tensor([736,  89,  75,  ...,  34, 676, 531]), 'chans': tensor([0, 0, 0,  ..., 0, 0, 0]), 'signs': tensor([0, 1, 1,  ..., 0, 1, 0]), 'times': tensor([255, 269, 103,  ..., 327, 171, 119])}
torch.Size([2, 365568])


In [12]:
import matplotlib.pyplot as plt
import IPython
IPython.display.Audio(waveforms[0].cpu().numpy(),rate=44100)


In [14]:
atomizer.features

{'velocities': AtomFeature(cardinality=1024, is_location=False),
 'freqs': AtomFeature(cardinality=1024, is_location=True),
 'chans': AtomFeature(cardinality=2, is_location=False),
 'signs': AtomFeature(cardinality=2, is_location=False),
 'times': AtomFeature(cardinality=None, is_location=True)}

In [None]:
result = prof.key_averages().table(sort_by="cuda_time_total", row_limit=10)
print(result)

In [8]:
prof.export_stacks("/tmp/profiler_stacks.txt", "self_cuda_time_total")

In [9]:
prof

<torch.profiler.profiler.profile at 0x7f32a5aa7d90>

In [3]:
from pytorch_lightning.utilities.model_summary import summarize

device =  'cpu'#cuda' if torch.cuda.is_available() and cfg.training.cuda else 'cpu'

# build the model
model = atomizer.AtomsLocatorMDCT(cfg).to(device)

# summarize
print(summarize(model, max_depth=1))


{'velocities': 1024, 'freqs': 1024, 'chans': 2, 'signs': 2, 'times': 131}
  | Name           | Type              | Params
-----------------------------------------------------
0 | abs_encoding   | ModuleDict        | 785 K 
1 | tf_spe         | PointSineEmbedder | 540   
2 | masks          | ParameterDict     | 1.8 K 
3 | mlp_pe         | Mlp               | 259 K 
4 | predictors     | ModuleDict        | 5.7 M 
5 | encoder_blocks | ModuleList        | 18.7 M
6 | norm           | LayerNorm         | 720   
7 | decoder_blocks | ModuleList        | 25.0 M
-----------------------------------------------------
50.5 M    Trainable params
0         Non-trainable params
50.5 M    Total params
201.945   Total estimated model params size (MB)


In [4]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint, Callback
import wandb
from omegaconf import OmegaConf
import os

class ConfWriterCallback(Callback):
    def __init__(self, cfg):
        self.cfg=cfg
    def on_train_start(self, trainer, pl_module):
        conf_path = os.path.join(trainer.checkpoint_callback.dirpath, 'config.yaml')
        OmegaConf.save(config=self.cfg, f=conf_path)
        print("Config saved to" + conf_path)

# create a logger, a lr monitor, a checkpoint and a conf writer callbacks
wandb_logger = WandbLogger(project="atomizer")
lr_monitor = LearningRateMonitor(logging_interval='step')
model_checkpoint = ModelCheckpoint(save_last=True, every_n_train_steps=1000)
conf_writer = ConfWriterCallback(cfg)
        
wandb_logger.experiment.config.update(OmegaConf.to_container(cfg))

trainer = Trainer(
    amp_backend="native",
    precision=32,
    accelerator='gpu',
    devices=1,
    detect_anomaly=True,
    logger=wandb_logger,
    accumulate_grad_batches=2,
    callbacks=[lr_monitor, model_checkpoint, conf_writer]
)

trainer.fit(model, loader)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mliutkus[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn("You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | abs_encoding   | ModuleDict        | 785 K 
1 | tf_spe         | PointSineEmbedder | 540   
2 | masks          | ParameterDict     | 1.8 K 
3 | mlp_pe         | Mlp               | 259 K 
4 | predictors     | ModuleDict        | 5.7 M 
5 | encoder_blocks | ModuleList        | 18.7 M
6 | norm           | LayerNorm         | 720   
7 | decoder_blocks | ModuleList        | 25.0 M
-----------------------------------------------------
50.5 M    Trainable params
0         Non-trainable params
50.5 M    Total params
201.945   Total estimated model params size (MB)


Config saved to/home/antoine/repositories/atomizer/notebooks/atomizer/1vfy450a/checkpoints/conf.yaml


Training: 0it [00:00, ?it/s]



RuntimeError: CUDA out of memory. Tried to allocate 22.00 MiB (GPU 0; 31.74 GiB total capacity; 13.12 GiB already allocated; 44.94 MiB free; 13.14 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [13]:
ckpt_path = trainer.checkpoint_callback.dirpath
cfg = OmegaConf.load(os.path.join(ckpt_path, 'config.yaml'))
model = atomizer.AtomsLocatorMDCT(cfg)
model.

'/home/antoine/repositories/atomizer/notebooks/atomizer/1g1yvrn2/checkpoints'

In [None]:
from tqdm import tqdm

for b in tqdm(zip(loader, range(100))):
        pass

In [None]:
import matplotlib.pyplot as plt
import IPython