In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath('/home/bld56/gsoc/nemo/NeMo-opensource'))
import nemo
print(nemo)
import nemo.core as nemo_core
from nemo.core import adapter_mixins
from nemo.utils import exp_manager
import nemo.collections.asr as nemo_asr
import nemo
import json
from omegaconf import OmegaConf, open_dict
import torch
from pytorch_lightning import Trainer
from lightning.pytorch.loggers import WandbLogger
from torchmetrics.text import WordErrorRate
import warnings


<module 'nemo' from '/home/bld56/gsoc/nemo/NeMo-opensource/nemo/__init__.py'>


In [2]:
# Function to load and configure the model
def load_and_configure_model(config_file_path):
    conf = OmegaConf.load(config_file_path)
    overrides = OmegaConf.from_cli()
    updated_conf = OmegaConf.merge(conf, overrides)
    OmegaConf.set_struct(updated_conf, True)
    model = nemo_asr.models.AV_EncDecCTCModelBPE(updated_conf)

    model.setup_training_data(model.cfg.train_ds)
    return model, conf

# Function to freeze and unfreeze model parameters based on adapters
def manage_model_adapters(model, conf):
    # Freeze the entire model
    model.freeze()
    
    # Determine which modules to train based on configuration
    if model.cfg.use_video_modality:
        modules_to_train = [
            model.a_linear, model.v_linear, model.av_encoder, model.av_enocder_layer, 
            model.a_modal_embs, model.v_modal_embs, model.decoder
        ]
    elif not model.cfg.use_video_modality and model.cfg.use_pretrained_dec:
        modules_to_train = [model.a_model.decoder]
    else:  # not model.cfg.use_video_modality and not model.cfg.use_pretrained_dec
        modules_to_train = [model.decoder]
    
    # Set the selected modules to training mode and enable gradients
    for module in modules_to_train:
        module.train()
        for param in module.parameters():
            param.requires_grad = True

    # Handle adapter configurations if needed
    if conf.adapters.linear_adapter.keep:
        model.a_model.freeze()
        model.a_model.set_enabled_adapters(enabled=False)
        model.a_model.set_enabled_adapters(name=conf.adapters.linear_adapter.name, enabled=True)
        model.a_model.unfreeze_enabled_adapters()


# Function to set up the trainer
def setup_trainer():
    accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'
    trainer = Trainer(
        devices=1, accelerator=accelerator, max_epochs=100,
        enable_checkpointing=False, logger=False,
        log_every_n_steps=5, check_val_every_n_epoch=1
    )
    return trainer

In [3]:
# Function to set up experiment manager
def setup_exp_manager(trainer, model):
    os.environ.pop('NEMO_EXPM_VERSION', None)

    exp_config = exp_manager.ExpManagerConfig(
        exp_dir=model.cfg.exp_dir,
        name=f'{model.cfg.wandb.run_name}',
        checkpoint_callback_params=exp_manager.CallbackParams(
            monitor="val_u_wer",
            mode="min",
            always_save_nemo=True,
            save_best_model=True,
        ),
        create_wandb_logger=model.cfg.wandb.create_wandb_logger,
        wandb_logger_kwargs=OmegaConf.create({"project": f"{model.cfg.wandb.project}", "name": f"{model.cfg.wandb.run_name}", "log_model": model.cfg.wandb.log_model}),
    )

    exp_config = OmegaConf.structured(exp_config)
    logdir = exp_manager.exp_manager(trainer, exp_config)
    if model.cfg.wandb.create_wandb_logger:
        trainer.loggers[1].log_hyperparams(OmegaConf.to_container(model.cfg)) # wandb logger
    return logdir


In [4]:
final_results = {}

In [5]:
# snr_list = [1,0.95,0.9,0.85,...0.5]
# snr_list = [1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]
snr_list = [0.5]
for snr in snr_list:
    config_file_path = "/home/bld56/gsoc/nemo/NeMo-opensource/balu_codes/configs/c3_au_with_same_av_arch.yaml"
    model, conf = load_and_configure_model(config_file_path)
    manage_model_adapters(model, conf)

    trainer = setup_trainer()
    model.set_trainer(trainer)
    logdir = setup_exp_manager(trainer, model)
    warnings.filterwarnings("ignore", category=UserWarning, message="PySoundFile failed. Trying audioread instead.")
    warnings.filterwarnings("ignore", category=FutureWarning, message="librosa.core.audio.__audioread_load\n\tDeprecated as of librosa version 0.10.0.\n\tIt will be removed in librosa version 1.0.")


[NeMo I 2024-08-02 11:46:40 mixins:172] Tokenizer SentencePieceTokenizer initialized with 356 tokens


[NeMo I 2024-08-02 11:46:42 collections:321] Dataset loaded with 22247 files totalling 61.80 hours
[NeMo I 2024-08-02 11:46:42 collections:323] 0 files were filtered totalling 0.00 hours
[NeMo I 2024-08-02 11:46:42 collections:321] Dataset loaded with 2447 files totalling 6.80 hours
[NeMo I 2024-08-02 11:46:42 collections:323] 0 files were filtered totalling 0.00 hours
[NeMo I 2024-08-02 11:46:42 collections:321] Dataset loaded with 2447 files totalling 6.80 hours
[NeMo I 2024-08-02 11:46:42 collections:323] 0 files were filtered totalling 0.00 hours
[NeMo I 2024-08-02 11:46:42 cloud:58] Found existing object /home/bld56/.cache/torch/NeMo/NeMo_2.0.0rc0/stt_en_conformer_ctc_large/afb212c5bcf904e326b5e5751e7c7465/stt_en_conformer_ctc_large.nemo.
[NeMo I 2024-08-02 11:46:42 cloud:64] Re-using file from: /home/bld56/.cache/torch/NeMo/NeMo_2.0.0rc0/stt_en_conformer_ctc_large/afb212c5bcf904e326b5e5751e7c7465/stt_en_conformer_ctc_large.nemo
[NeMo I 2024-08-02 11:46:42 common:815] Instantiatin

[NeMo W 2024-08-02 11:46:43 modelPT:176] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath:
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket1/tarred_audio_manifest.json
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket2/tarred_audio_manifest.json
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket3/tarred_audio_manifest.json
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket4/tarred_audio_manifest.json
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket5/tarred_audio_manifest.json
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket6/tarred_audio_manifest.json
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket7/tarred_audio_manifest.json
    - - /data2/nemo_asr/nemo_asr_set_3.0/bucket8/tarred_audio_manifest.json
    sample_rate: 16000
    batch_size: 1
    shuffle: true
    num_workers: 4
    pin_memory: true
    use_start_end_token: false
    trim_

[NeMo I 2024-08-02 11:46:43 features:305] PADDING: 0
[NeMo I 2024-08-02 11:46:46 save_restore_connector:263] Model EncDecCTCModelBPE was successfully restored from /home/bld56/.cache/torch/NeMo/NeMo_2.0.0rc0/stt_en_conformer_ctc_large/afb212c5bcf904e326b5e5751e7c7465/stt_en_conformer_ctc_large.nemo.
[NeMo I 2024-08-02 11:46:48 collections:321] Dataset loaded with 22247 files totalling 61.80 hours
[NeMo I 2024-08-02 11:46:48 collections:323] 0 files were filtered totalling 0.00 hours
[NeMo I 2024-08-02 11:46:48 adapter_mixins:719] Setting adapter 'AV_v1' status : Enabled = False
[NeMo I 2024-08-02 11:46:48 adapter_mixins:734] Setting adapter 'AV_v1' status : Enabled = True
[NeMo I 2024-08-02 11:46:48 adapter_mixins:405] Froze module encoder.layers.0.conv.batch_norm: BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
[NeMo I 2024-08-02 11:46:48 adapter_mixins:405] Froze module encoder.layers.1.conv.batch_norm: BatchNorm1d(512, eps=1e-05, momentum=0.1, affin

[NeMo W 2024-08-02 11:46:48 nemo_logging:349] /home/bld56/.miniconda3/envs/nemo/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/bld56/.miniconda3/envs/nemo/lib/python3.10/sit ...
    
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


[NeMo I 2024-08-02 11:46:48 exp_manager:396] Experiments will be logged at /tmp/bld56_dataset_v1/tmp/au_ndec_lman_ntok_NArch_0.5/2024-08-02_11-46-48
[NeMo I 2024-08-02 11:46:48 exp_manager:856] TensorboardLogger has been set up
[NeMo I 2024-08-02 11:46:48 exp_manager:871] WandBLogger has been set up


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlakshmipathi-balaji[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
model.summarize()

   | Name             | Type                    | Params | Mode 
----------------------------------------------------------------------
0  | a_model          | EncDecCTCModelBPE       | 122 M  | eval 
1  | a_linear         | Linear                  | 262 K  | train
2  | v_linear         | Linear                  | 262 K  | train
3  | av_enocder_layer | TransformerEncoderLayer | 3.2 M  | train
4  | av_encoder       | TransformerEncoder      | 12.6 M | train
5  | a_modal_embs     | Embedding               | 512    | train
6  | v_modal_embs     | Embedding               | 512    | train
7  | a_pos_enc        | Embedding               | 5.1 M  | eval 
8  | v_pos_enc        | Embedding               | 5.1 M  | eval 
9  | decoder          | ConvASRDecoder          | 183 K  | train
10 | loss             | CTCLoss                 | 0      | eval 
11 | wer              | AV_WER                  | 0      | eval 
----------------------------------------------------------------------
17.7 M    Tra

In [7]:
trainer.validate(model, model.test_dataloader())

[NeMo W 2024-08-02 11:47:06 nemo_logging:349] /home/bld56/.miniconda3/envs/nemo/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/bld56/.miniconda3/envs/nemo/lib/python3.10/sit ...
    
You are using a CUDA device ('NVIDIA L40S') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Validation: |          | 0/? [00:00<?, ?it/s]

      return F.conv2d(input, weight, bias, self.stride,
    


[{'global_step': 0.0,
  'val_l_wer': 1.984375,
  'val_u_wer': 2.1525423526763916,
  'val_acc': 0.0,
  'val_loss': 1231.60107421875,
  'val_wer': 2.1525423526763916}]

In [7]:
# a = trainer.validate(model, model.val_dataloader())
# final_results[snr] = a[0]['val_u_wer']

[NeMo W 2024-08-01 13:37:16 nemo_logging:349] /home/bld56/.miniconda3/envs/nemo/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/bld56/.miniconda3/envs/nemo/lib/python3.10/sit ...
    
You are using a CUDA device ('NVIDIA L40S') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo W 2024-08-01 13:37:21 nemo_logging:349] /home/bld56/.miniconda3/envs/nemo/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
    


In [6]:
trainer.fit(model)

[NeMo W 2024-08-01 13:40:08 nemo_logging:349] /home/bld56/.miniconda3/envs/nemo/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/bld56/.miniconda3/envs/nemo/lib/python3.10/sit ...
    
You are using a CUDA device ('NVIDIA L40S') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


[NeMo I 2024-08-01 13:40:09 modelPT:767] Optimizer config = AdamW (
    Parameter Group 0
        amsgrad: False
        betas: [0.9, 0.98]
        capturable: False
        differentiable: False
        eps: 1e-08
        foreach: None
        fused: None
        lr: 2.0
        maximize: False
        weight_decay: 0.001
    )
[NeMo I 2024-08-01 13:40:09 lr_scheduler:923] Scheduler "<nemo.core.optim.lr_scheduler.NoamAnnealing object at 0x7f4680f3dd20>" 
    will be used during training (effective maximum steps = 139100) - 
    Parameters : 
    (d_model: 512
    warmup_steps: 2000
    warmup_ratio: null
    min_lr: 1.0e-06
    max_steps: 139100
    )



  | Name    | Type              | Params | Mode
-----------------------------------------------------
0 | a_model | EncDecCTCModelBPE | 122 M  | eval
1 | decoder | ConvASRDecoder    | 183 K  | eval
2 | loss    | CTCLoss           | 0      | eval
3 | wer     | AV_WER            | 0      | eval
-----------------------------------------------------
1.2 M     Trainable params
121 M     Non-trainable params
122 M     Total params
491.530   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

      return F.conv2d(input, weight, bias, self.stride,
    


RuntimeError: blank must be in label range

In [None]:
model.summarize()