In [26]:
# Libraries related to PyTorch
import torch
from torch import Tensor
import torchaudio 
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import WeightedRandomSampler,DataLoader

# Libraries related to PyTorch Lightning
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

# Libraries related to hydra
import hydra
from hydra.utils import to_absolute_path
from omegaconf import DictConfig, OmegaConf

# custom packages
from dataset.speechcommands import SPEECHCOMMANDS_12C #for 12 classes KWS task
import models as Model

from dataloading_util import data_processing
from datetime import datetime

In [2]:
hydra.initialize("conf")

hydra.initialize()

In [3]:
cfg = hydra.compose("KWS_config", overrides=[])



# Loading pretrained weight

# Setting up dataset

In [5]:
cfg.data_root = to_absolute_path(cfg.data_root) # convert relative path to absolute path

batch_size = cfg.batch_size

testset = SPEECHCOMMANDS_12C(**cfg.dataset.test)

basename='speech_commands_test_set_v0.02.tar.gz'


Loading testing set: 0it [00:00, ?it/s]


In [6]:
    # for class weighting, rebalancing silence(10th class) and unknown(11th class) in training set
    class_weights = [1,1,1,1,1,1,1,1,1,1,4.6,1/17]
    sample_weights = [0] * len(trainset)
    #create a list as per length of trainset

    for idx, (data,rate,label,speaker_id, _) in enumerate(trainset):
        class_weight = class_weights[label]
        sample_weights[idx] = class_weight
    #apply sample_weights in each data base on their label class in class_weight
    #ref: https://www.youtube.com/watch?v=4JFVhJyTZ44&t=518s
    sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights),replacement=True)
            
    
    testloader = DataLoader(testset,   
                                  collate_fn=lambda x: data_processing(x),
                                            **cfg.dataloader.test)     

In [7]:
cfg.model.model_type

'BCResNet_nnAudio'

# Loading pretrained weight

In [29]:
cfg.model.args.input_dim = cfg.model.spec_args.n_mels *101 
train_setting=cfg.model.spec_args.trainable_mel
n_mel=cfg.model.spec_args.n_mels
stft = cfg.model.spec_args.trainable_STFT
net = getattr(Model, cfg.model.model_type)

net.load_from_checkpoint('lightning_logs/version_0/checkpoints/last.ckpt', cfg_model=cfg.model)

STFT kernels created, time used = 0.1838 seconds
STFT filter created, time used = 0.0037 seconds
Mel filter created, time used = 0.0037 seconds


BCResNet_nnAudio(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(2, 1), padding=(2, 2))
  (block1_1): TransitionBlock(
    (freq_dw_conv): Conv2d(8, 8, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), groups=8, bias=False)
    (ssn): SubSpectralNorm(
      (bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (temp_dw_conv): Conv2d(8, 8, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), groups=8, bias=False)
    (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (channel_drop): Dropout2d(p=0.5, inplace=False)
    (swish): SiLU()
    (conv1x1_1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (conv1x1_2): Conv2d(8, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
  (block1_2): BroadcastedBlock(
    (freq_dw_conv): Conv2d(8, 8, kernel_size=(3, 1), stride=(1,

# Testing the performance

In [32]:
trainer = Trainer(**cfg.trainer)
trainer.test(net, testloader)
#added validloader, in order to reach validation_step



GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


AttributeError: 'property' object has no attribute 'items'