In [6]:
import torch
import dataclasses
from typing import Tuple
from copy import deepcopy

from kws.models import CRNN
from test_performance import test_preformance

In [7]:
@dataclasses.dataclass
class UncompressedConfig:
    keyword: str = 'sheila'  # We will use 1 key word -- 'sheila'
    batch_size: int = 128
    learning_rate: float = 3e-4
    weight_decay: float = 1e-5
    num_epochs: int = 20
    n_mels: int = 40
    cnn_out_channels: int = 8
    kernel_size: Tuple[int, int] = (5, 20)
    stride: Tuple[int, int] = (2, 8)
    hidden_size: int = 32
    gru_num_layers: int = 2
    bidirectional: bool = False
    num_classes: int = 2
    sample_rate: int = 16000
    device: torch.device = torch.device(
        'cuda:0' if torch.cuda.is_available() else 'cpu')
    max_window_length: int = 16000
    streaming_step_size: int = 1600

In [8]:
base_config = UncompressedConfig()

baseline = test_preformance(UncompressedConfig())

102it [00:22,  4.54it/s]


In [9]:
reduced_gru = deepcopy(base_config)
reduced_gru.gru_num_layers = 1

reduced_gru_dict = test_preformance(reduced_gru)

102it [00:20,  4.86it/s]


In [10]:
reduced_channels = deepcopy(base_config)
reduced_channels.cnn_out_channels = 4

reduced_channels_dict = test_preformance(reduced_channels)

102it [00:21,  4.82it/s]


In [11]:
conf = deepcopy(base_config)
conf.hidden_size = 8

reduced_hidden_dict = test_preformance(conf)

102it [00:20,  5.03it/s]


In [12]:
conf = deepcopy(base_config)
conf.n_mels = 20

reduced_mels_dict = test_preformance(conf)

102it [00:20,  4.92it/s]


In [13]:
model = CRNN(base_config)
quantized_model = torch.quantization.quantize_dynamic(model, dtype=torch.float16)

quantized_dict = test_preformance(base_config, quantized_model)

102it [00:20,  4.98it/s]


In [15]:
conf = deepcopy(base_config)
conf.hidden_size = 8
conf.cnn_out_channels = 4
conf.n_mels = 20
model = CRNN(conf)
quantized_model = torch.quantization.quantize_dynamic(model, dtype=torch.float16)

reduced_all_dict = test_preformance(conf, model=quantized_model)

102it [00:22,  4.60it/s]


In [17]:
conf = deepcopy(base_config)
conf.hidden_size = 8
conf.cnn_out_channels = 4
conf.n_mels = 20

reduced_noq_dict = test_preformance(conf)

102it [00:21,  4.66it/s]


In [19]:
conf = deepcopy(base_config)
conf.hidden_size = 8
conf.cnn_out_channels = 4
conf.n_mels = 20
conf.gru_num_layers = 1

reduced_noq_dict = test_preformance(conf)

102it [00:21,  4.66it/s]


In [22]:
comp_dict = dict(
    reduced_gru = reduced_gru_dict,
    reduced_channels = reduced_channels_dict,
    reduced_hidden = reduced_hidden_dict,
    reduced_mels = reduced_mels_dict,
    quantized = quantized_dict,
    reduced_all = reduced_all_dict,
    reduced_noq = reduced_noq_dict,
)

for name, cmp in comp_dict.items():
    print(f'Experiment name: {name}')
    for key in baseline:
        print('{} reduced by {:.6f}'.format(key, baseline[key] / cmp[key]))
    print('###\n###')

Experiment name: reduced_gru
memory reduced by 1.322877
au_fa_fr reduced by 1.017830
time reduced by 1.954158
MACs reduced by 1.200160
num_params reduced by 1.332581
###
###
Experiment name: reduced_channels
memory reduced by 1.369009
au_fa_fr reduced by 0.900967
time reduced by 1.410639
MACs reduced by 1.559516
num_params reduced by 1.404848
###
###
Experiment name: reduced_hidden
memory reduced by 4.022875
au_fa_fr reduced by 1.039945
time reduced by 2.587531
MACs reduced by 2.101900
num_params reduced by 5.042105
###
###
Experiment name: reduced_mels
memory reduced by 1.394914
au_fa_fr reduced by 1.040343
time reduced by 1.742663
MACs reduced by 1.662895
num_params reduced by 1.433727
###
###
Experiment name: quantized
memory reduced by 0.923332
au_fa_fr reduced by 1.647724
time reduced by 1.216756
MACs reduced by 2.731515
num_params reduced by 31.419554
###
###
Experiment name: reduced_all
memory reduced by 7.421449
au_fa_fr reduced by 0.757687
time reduced by 1.939248
MACs reduced

In [None]:
# insert observers
torch.quantization.prepare(, inplace=True)
# Calibrate the model and collect statistics

# convert to quantized version
torch.quantization.convert(myModel, inplace=True)