# Load architecture

### Testing onnx exporting

In [None]:
%%capture 
!pip install -e ./

In [None]:
import os 
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,3"

from wwv.Architecture.ResNet.model import ResNet
from wwv.Architecture.HTSwin.model import HTSwinTransformer
# from wwv.Architecture.DeepSpeech.model import DeepSpeech

import torch 
import torch.nn.functional as F 
# from wwv.architecture import Architecture
from wwv.eval import Metric
import statistics
from wwv.data import AudioDataModule
import wwv.config as cfg 

# from wwv.config import Config, FittingCfg, DataPathCfg, ResNetCfg
from wwv.meta import params as params 
from torchlibrosa.stft import Spectrogram, LogmelFilterBank, DFT
from torchlibrosa.augmentation import SpecAugmentation

from wwv.eval import Metric
from wwv.util import OnnxExporter
from wwv.routine import Routine
import bisect 
import torch 
from pytorch_lightning import Trainer
import pytorch_lightning as pl 
import torch.nn.functional as F 
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping,ModelCheckpoint,LearningRateMonitor, ModelPruning

from torch.optim.lr_scheduler import ReduceLROnPlateau

torch.cuda.is_available()
cfg_fitting = cfg.Fitting()
cfg_signal = cfg.Signal()
cfg_feature = cfg.Feature()
cfg_model1 = cfg.HTSwin() # cfg.ResNet()
cfg_model2 = cfg.ResNet()


class Fitter:

    def __init__(self, model, cfg_model, cfg, data_path="/home/akinwilson/Code/HTS-Audio-Transformer") -> None:
        self.model = model
        self.cfg_model = cfg_model
        self.cfg_fitting = cfg.Fitting()
        self.cfg_signal = cfg.Signal()
        self.cfg_feature = cfg.Feature()
        self.data_path = cfg.DataPath(data_path, self.cfg_model.model_name, self.cfg_model.model_dir)

    def setup(self):
        data_module = AudioDataModule(self.data_path.root_data_dir,
                                    cfg_model=self.cfg_model,
                                    cfg_feature=self.cfg_feature,
                                    cfg_fitting=self.cfg_fitting)

        train_loader =  data_module.train_dataloader()
        val_loader =  data_module.val_dataloader()
        test_loader =  data_module.test_dataloader()
    
        return train_loader, val_loader, test_loader


    def get_callbacks(self):
        lr_monitor = LearningRateMonitor(logging_interval='epoch')
        early_stopping = EarlyStopping(mode="min", monitor='val_loss', patience=self.cfg_fitting.es_patience)
        checkpoint_callback = ModelCheckpoint(monitor="val_loss",
                                                dirpath=self.data_path.model_dir,
                                                save_top_k=1,
                                                mode="min",
                                                filename='{epoch}-{val_loss:.2f}-{val_acc:.2f}-{val_ttr:.2f}-{val_ftr:.2f}')
        callbacks = [checkpoint_callback, lr_monitor, early_stopping]
        return callbacks 


    def __call__(self):
        logger = TensorBoardLogger(save_dir=self.data_path.model_dir, version=1, name="lightning_logs")
        Model = self.model

        if self.cfg_model.model_name == "HSTAT":
            kwargs = { "spec_size":self.cfg_model.spec_size,
                "patch_size":self.cfg_model.patch_size,
                "in_chans":1,
                "num_classes":self.cfg_model.num_classes,
                "window_size":self.cfg_model.window_size,
                "cfg_signal":self.cfg_signal, 
                "depths":self.cfg_model.depth,
                "embed_dim":self.cfg_model.dim,
                "patch_stride":self.cfg_model.stride,
                "num_heads": self.cfg_model.num_head}
        else:
            kwargs = {"num_blocks":self.cfg_model.num_blocks,"dropout":0.2}
        

        train_loader, val_loader, test_loader = self.setup()
        model = Model(**kwargs)
        routine = Routine(model, self.cfg_fitting, self.cfg_model)
        trainer = Trainer(accelerator="gpu",
                        devices=3,
                        strategy='dp',
                        sync_batchnorm = True,
                        logger = logger, 
                        default_root_dir=self.data_path.model_dir,
                        callbacks=self.get_callbacks(),
                        num_sanity_val_steps = 2,
                        resume_from_checkpoint = None, 
                        gradient_clip_val=1.0,
                        fast_dev_run=False)

        # PATH  = "/home/akinwilson/Code/pytorch/output/model/ResNet/epoch=18-val_loss=0.15-val_acc=0.95-val_ttr=0.92-val_ftr=0.03.ckpt"                  
        trainer.fit(routine, train_dataloaders=train_loader, val_dataloaders=val_loader) # ,ckpt_path=PATH)
        trainer.test(dataloaders=test_loader)



Fitter(HTSwinTransformer, cfg_model1, cfg)()

In [8]:
import os 
os.environ["CUDA_VISIBLE_DEVICES"]="0,1,3"

from wwv.Architecture.ResNet.model import ResNet
from wwv.Architecture.HTSwin.model import HTSwinTransformer
# from wwv.Architecture.DeepSpeech.model import DeepSpeech

import torch 
import torch.nn.functional as F 
# from wwv.architecture import Architecture
from wwv.eval import Metric
import statistics
from wwv.data import AudioDataModule
import wwv.config as cfg 

# from wwv.config import Config, FittingCfg, DataPathCfg, ResNetCfg
from wwv.meta import params as params 
from torchlibrosa.stft import Spectrogram, LogmelFilterBank, DFT
from torchlibrosa.augmentation import SpecAugmentation

from wwv.eval import Metric
from wwv.util import OnnxExporter
from wwv.routine import Routine
import bisect 
import torch 
from pytorch_lightning import Trainer
import pytorch_lightning as pl 
import torch.nn.functional as F 
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping,ModelCheckpoint,LearningRateMonitor, ModelPruning

from torch.optim.lr_scheduler import ReduceLROnPlateau

torch.cuda.is_available()
cfg_fitting = cfg.Fitting()
cfg_signal = cfg.Signal()
cfg_feature = cfg.Feature()
cfg_model = cfg.HTSwin() # cfg.ResNet()
cfg_model2 = cfg.ResNet()


data_path = cfg.DataPath("/home/akinwilson/Code/HTS-Audio-Transformer", cfg_model.model_name, cfg_model.model_dir)
data_module = AudioDataModule(data_path.root_data_dir,
                               cfg_model=cfg_model,
                               cfg_feature=cfg_feature,
                               cfg_fitting=cfg_fitting)


import copy 

test_loader = data_module.test_dataloader()
x = next(iter(test_loader))
input_shape = tuple(x['x'].shape[1:])
input_shape = copy.deepcopy(input_shape)
input_shape

(1, 32000)

In [1]:
from wwv.eval import Metric
from wwv.util import OnnxExporter
from wwv.routine import Routine
import bisect 
import torch 
from pytorch_lightning import Trainer
import pytorch_lightning as pl 
import torch.nn.functional as F 
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping,ModelCheckpoint,LearningRateMonitor, ModelPruning

from torch.optim.lr_scheduler import ReduceLROnPlateau


data_path = cfg.DataPath("/home/akinwilson/Code/HTS-Audio-Transformer", cfg_model.model_name, cfg_model.model_dir)
data_module = AudioDataModule(data_path.root_data_dir,
                               cfg_model=cfg_model,
                               cfg_feature=cfg_feature,
                               cfg_fitting=cfg_fitting)

# model = Architecture(cfg, training=True)
# model.extractor(torch.randn((1,48000))) # (torch.randn((1,48000)))
root = "/home/akinwilson/Code/pytorch/dataset/keywords"
# model = Architecture(cfg, training=True)
# model.extractor(torch.randn((1,48000))) # (torch.randn((1,48000)))
# model = Architecture(cfg, True)
                
train_loader =  data_module.train_dataloader()
val_loader =  data_module.val_dataloader()
test_loader =  data_module.test_dataloader()


# model.processing_layer[3](x)
def get_callbacks():
    lr_monitor = LearningRateMonitor(logging_interval='epoch')
    early_stopping = EarlyStopping(mode="min", monitor='val_loss', patience=cfg_fitting.es_patience)
    checkpoint_callback = ModelCheckpoint(monitor="val_loss",
                                            dirpath=data_path.model_dir,
                                            save_top_k=1,
                                            mode="min",
                                            filename='{epoch}-{val_loss:.2f}-{val_acc:.2f}-{val_ttr:.2f}-{val_ftr:.2f}')
    callbacks = [checkpoint_callback, lr_monitor, early_stopping]
    return callbacks 

logger = TensorBoardLogger(save_dir=data_path.model_dir, version=1, name="lightning_logs")




# model = HTSwinTransformer(
#     spec_size=cfg_model.spec_size,
#     patch_size=cfg_model.patch_size,
#     in_chans=1,
#     num_classes=cfg_model.num_classes,
#     window_size=cfg_model.window_size,
#     cfg_signal= cfg_signal, 
#     depths = cfg_model.depth,
#     embed_dim = cfg_model.dim,
#     patch_stride = cfg_model.stride,
#     num_heads= cfg_model.num_head
# )


# routine = Routine(model, cfg_fitting, cfg_model)
# trainer = Trainer(accelerator="gpu",
#                   devices=3,
#                   strategy='dp',
#                   sync_batchnorm = True,
#                   logger = logger, 
#                   default_root_dir=data_path.model_dir,
#                   callbacks=get_callbacks(),
#                   num_sanity_val_steps = 2,
#                   resume_from_checkpoint = None, 
#                   gradient_clip_val=1.0,
#                  fast_dev_run=False)


# # PATH  = "/home/akinwilson/Code/pytorch/output/model/ResNet/epoch=18-val_loss=0.15-val_acc=0.95-val_ttr=0.92-val_ftr=0.03.ckpt"                  
# trainer.fit(routine, train_dataloaders=train_loader, val_dataloaders=val_loader) # ,ckpt_path=PATH)

# trainer.test(dataloaders=test_loader)


# from wwv.util import OnnxExporter
# import torch.nn as nn
# model = trainer.model.module.module.model

# class Predictor(nn.Module):
#     def __init__(self, model):
#         super().__init__()
#         self.model = model

#     def forward(self, x):
#         logits =self.model(x)
#         pred = F.sigmoid(logits)
#         return pred 

# predictor = Predictor(model)
# OnnxExporter(model=predictor,
#              cfg=cfg,
#              input_shape=(1, 40, 75),
#              output_dir=data_path.model_dir, op_set=12)()

# # ####################################################################################################################
                                           
# ####################################################################################################################
# if isinstance(trainer.model, torch.nn.DataParallel):
#     print("test")
#     model = trainer.model
# ####################################################################################################################
# reload best 
# ####################################################################################################################
# automatically auto-loads the best weights from the previous run 
# ####################################################################################################################

  from .autonotebook import tqdm as notebook_tqdm
2022-11-05 05:10:19.332431: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-05 05:10:19.475326: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-05 05:10:20.033753: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-11-05 05:10:20.033803: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7

NameError: name 'cfg' is not defined

In [None]:
import sys
import os
import onnx
from onnx_tf.backend import prepare
import wwv.config as cfg 
import torch  
import tensorflow as tf 

torch.cuda.is_available()
cfg_fitting = cfg.Fitting()
cfg_feature = cfg.Feature()
cfg_resnet = cfg.ResNet()

model_in_path = "/home/akinwilson/Code/pytorch/output/model/ResNet/model.onnx"
model_out_path = "/home/akinwilson/Code/pytorch/notebooks/tf"
out_lite_path = "/home/akinwilson/Code/pytorch/notebooks/tflite"
out_quant_lite_path = "/home/akinwilson/Code/pytorch/notebooks/tflite_quant"

onnx_model = onnx.load(model_in_path)  # load onnx model
tf_rep = prepare(onnx_model)  # prepare tf representation
# tf_rep.export_graph(model_out_path)  # export the model

In [None]:
import tensorflow as tf 
model = tf.keras.models.load_model(model_out_path)
# model.predict()


In [None]:
# test_loader
from wwv.meta import params
from wwv.data import AudioDataModule
Cfg = cfg.Config(params)

data_path = cfg.DataPath("/home/akinwilson/Code/pytorch/dataset/keywords", Cfg.model_name, Cfg.path['model_dir'])

# model = Architecture(cfg, training=True)
# model.extractor(torch.randn((1,48000))) # (torch.randn((1,48000)))
# model = Architecture(cfg, True)
data_module = AudioDataModule(data_path.root_data_dir,
                              cfg=Cfg,
                              cfg_feature=cfg_feature,
                              cfg_fitting=cfg_fitting)
                              
test_loader =  data_module.test_dataloader()

def get_torch_representative_dataset(test_loader):
    representative_x = []
    representative_y = []
    for batch in test_loader:
        x = batch['x']
        y = batch['y']
        representative_x.append(x)
        representative_y.append(y)


    x = torch.vstack(representative_x)
    y = torch.stack(representative_y).view(-1,1)
    return x,y 
# representative_y

In [None]:
import tensorflow as tf


def torch_to_tf_dataset(x, y):
    tf_feats = tf.convert_to_tensor(x.numpy())
    tf_labels = tf.convert_to_tensor(y.numpy())
    dataset = tf.data.Dataset.from_tensor_slices((tf_feats, tf_labels)) 
    dataset = dataset.concatenate(dataset)
    return dataset

x, y = get_torch_representative_dataset(test_loader)
representative_dataset = torch_to_tf_dataset(x,y)



def callable_generator_convertor(_gen):
    def gen():
        for x,y in _gen:
            yield x,y
    return gen


rep_ds = callable_generator_convertor(representative_dataset)

rep_ds[1]

In [None]:
from tensorflow import lite
import tensorflow as tf
import tensorflow_model_optimization as tfmot
from pathlib import Path 
import sys
import os
import onnx
from onnx_tf.backend import prepare

model_in_path = "/home/akinwilson/Code/pytorch/output/model/ResNet/model.onnx"
model_out_path = "/home/akinwilson/Code/pytorch/notebooks/tf"
out_lite_path = "/home/akinwilson/Code/pytorch/notebooks/tflite"
out_quant_lite_path = "/home/akinwilson/Code/pytorch/notebooks/tflite_quant"

# onnx_model = onnx.load(model_in_path)  # load onnx model
# tf_rep = prepare(onnx_model)  # prepare tf representation
# tf_rep.export_graph(model_out_path)  # export the model



class TfliteConverter:
    '''
    Converts model in onnx format to TFLite. 
    '''
    def __init__(self, in_path, out_path, out_lite_path, out_lite_quant_path, test_loader, quantise=True):
         self.in_path =  in_path
         self.out_path = out_path
         self.out_lite_path = out_lite_path
         self.out_lite_quant_path = out_lite_quant_path
         self.test_loader = test_loader
         self.quantise=quantise


    def get_torch_representative_dataset(self, test_loader):
        representative_x = []
        representative_y = []
        for batch in test_loader:
            x = batch['x']
            y = batch['y']
            representative_x.append(x)
            representative_y.append(y)


        x = torch.vstack(representative_x)
        y = torch.stack(representative_y).view(-1,1)
        return x,y 

    @staticmethod
    def callable_generator_convertor(_gen):
        def gen():
            for x,y in _gen:
                yield x,y
        return gen

    def torch_to_tf_dataset(self, x, y):
        tf_feats = tf.convert_to_tensor(x.numpy())
        tf_labels = tf.convert_to_tensor(y.numpy())
        
        dataset = tf.data.Dataset.from_tensor_slices((tf_feats, tf_labels))
        dataset = dataset.concatenate(dataset)
        return dataset


    def __call__(self):
        # load from onnx and convert to tf 
        onnx_model = onnx.load(self.in_path)  # load onnx model
        tf_rep = prepare(onnx_model)  # prepare tf representation
        tf_rep.export_graph(self.out_path)  # export the model

        # init convert 
        converter = lite.TFLiteConverter.from_saved_model(self.out_path)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        # convert model in memory 
        tflite_model = converter.convert()
        # save converted in-memory model 
        with open(out_lite_path, "wb") as file_handle: 
            file_handle.write(tflite_model)

        if self.quantise:
            # convert torch test set into tf dataset for quantisation purposes 
            test_loader = self.test_loader
            x, y = self.get_torch_representative_dataset(test_loader)
            non_callable_tf_dataset = self.torch_to_tf_dataset(x,y)

            representative_dataset = TfliteConverter.callable_generator_convertor(non_callable_tf_dataset)
            # quantise the model 
            converter = lite.TFLiteConverter.from_saved_model(model_out_path)
            converter.optimizations = [tf.lite.Optimize.DEFAULT]

            converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
            converter.inference_input_type = tf.float32
            converter.inference_output_type = tf.float32

            converter.representative_dataset = representative_dataset

            tflite_quant_model = converter.convert()
            with open(out_quant_lite_path, "wb") as file_handle:

                file_handle.write(tflite_quant_model)


quantise = True 
# converter = lite.TFLiteConverter.from_saved_model(model_out_path)
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# tflite_model = converter.convert()

# with open(out_lite_path, "wb") as file_handle: 
#     file_handle.write(tflite_model)

if quantise:
    # quantise the model 
    converter = lite.TFLiteConverter.from_saved_model(model_out_path)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]

    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.float32
    converter.inference_output_type = tf.float32

    converter.representative_dataset = callable_generator_convertor(representative_dataset)

    tflite_quant_model = converter.convert()
    with open(out_quant_lite_path, "wb") as file_handle:

        file_handle.write(tflite_quant_model)

# open(join(model_folder, f'{model_name}_epoch_{result["epoch"]}.tflite'), "wb")
# Quantized TFLite Model


In [None]:
model_out_path = "/home/akinwilson/Code/pytorch/notebooks/tf"
converter = lite.TFLiteConverter.from_saved_model(model_out_path)


converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32
converter.inference_output_type = tf.float32


converter.__dict__

In [None]:
from transformers import Wav2Vec2FeatureExtractor, SEWDForSequenceClassification
from datasets import load_dataset
import torch

# dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
# dataset = dataset.sort("id")
sampling_rate = dataset.features["audio"].sampling_rate
class SEW(nn.Module):

    def __init__(self):
        super().__init__()
        self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("anton-l/sew-d-mid-400k-ft-keyword-spotting")
        self.model = SEWDForSequenceClassification.from_pretrained("anton-l/sew-d-mid-400k-ft-keyword-spotting")


    def forward(self, x):
        # x = input_dict['input_values']
        x_feats = self.feature_extractor(x, ampling_rate=sampling_rate, return_tensors="pt")
        x_feats.unsqueeze(1)
        logits = self.model(x)
        return logits 
dataset[0]["audio"]["array"]
sew  =SEW()
# # audio file is decoded on the fly
inputs = dataset[0]["audio"]["array"]
with torch.no_grad():
    logits = sew(torch.tensor(inputs))
print(logits)
# predicted_class_ids = torch.argmax(logits, dim=-1).item()
# predicted_label = model.config.id2label[predicted_class_ids]
# predicted_label