# Quantize GRU

In [1]:
from model import PerformanceRNN
import torch
from torch import nn
import distiller
from distiller.modules.gru import DistillerGRU as GRU
from distiller.modules.gru import convert_model_to_distiller_gru
from tqdm import tqdm
import numpy as np

### Loading the model and converting to our own implementation.

In [2]:
assert torch.cuda.is_available()
device = 'cuda:0'
sess_path = "save/ecomp_w500.sess"
state = torch.load(sess_path)
rnn_model = PerformanceRNN(**state['model_config']).to(device)
rnn_model.load_state_dict(state['model_state'])

In [3]:
rnn_model = convert_model_to_distiller_gru(rnn_model)

Check that man_model is on GRU.

In [None]:
next(man_model.parameters()).is_cuda

### Check that the conversion has succeeded:

In [None]:
rnn_model.eval()

In [None]:
man_model.eval()

Test to make sure that both the original and manual models can generate output.

In [None]:
model = quantizer.model.to(device)
model.eval()
batch_size = 1
init = torch.randn(batch_size, model.init_dim).to(device)
max_len = 1000
controls=None
greedy_ratio = 0.7
temperature = 1.0

import pdb

with torch.no_grad():
    #pdb.set_trace()
    outputs = model.generate(init, max_len,
                             controls=controls,
                             greedy=greedy_ratio,
                             temperature=temperature,
                             verbose=True)
    

outputs = outputs.cpu().numpy().T # [batch, steps]

Convert output to MIDI and save.

In [None]:
import utils
import os

output_dir = "quantized_output/"
os.makedirs(output_dir, exist_ok=True)

for i, output in enumerate(outputs):
    name = f'output-{i:03d}.mid'
    path = os.path.join(output_dir, name)
    n_notes = utils.event_indeces_to_midi_file(output, path)
    print(f'===> {path} ({n_notes} notes)')

### Defining the evaluation:

# Quantizing the model:

## Collect activation statistics:

The model uses activation statistics to determine how big the quantization range is. The bigger the range - the larger the round off error after quantization which leads to accuracy drop.  
Our goal is to minimize the range s.t. it contains the absolute most of our data.  
After that, we divide the range into chunks of equal size, according to the number of bits, and transform the data according to this scale factor.  
Read more on scale factor calculation [in our docs](https://nervanasystems.github.io/distiller/algo_quantization.html).

The class `QuantCalibrationStatsCollector` collects the statistics for defining the range $r = max - min$.  

Each forward pass, the collector records the values of inputs and outputs, for each layer:
- absolute over all batches min, max (stored in `min`, `max`)
- average over batches, per batch min, max (stored in `avg_min`, `avg_max`)
- mean
- std
- shape of output tensor  

All these values can be used to define the range of quantization, e.g. we can use the absolute `min`, `max` to define the range.

Check that `man_model` has the same weights as `rnn_model` (Warning: Running this will move the models to the CPU).

In [None]:
import numpy.testing as nptest

man_model_weights = man_model.cpu().output_fc.weight.detach().numpy()
rnn_model_weights = rnn_model.cpu().output_fc.weight.detach().numpy()
nptest.assert_array_almost_equal(man_model_weights, rnn_model_weights)

Check that `man_model` is on the GPU.

In [None]:
next(man_model.parameters()).is_cuda

In [4]:
# My version.
import os
from distiller.data_loggers import QuantCalibrationStatsCollector, collector_context

# Commented line is probably not necessary.
#man_model = torch.load('./manual.checkpoint.pth.tar')
distiller.utils.assign_layer_fq_names(rnn_model)
collector = QuantCalibrationStatsCollector(rnn_model,#,
                                           inplace_runtime_check=True)
                                           #disable_inplace_attrs=True) # I added this last argument.

# Random numbers.
batch_size = 64
max_len = 100


with collector_context(collector) as collector:
    init = torch.randn(batch_size, rnn_model.init_dim).to(device)
    output = rnn_model.generate(init, max_len)
    collector.save('performance_rnn_pretrained_stats.yaml')

RuntimeError: Inplace operation detected, meaning inputs stats are overridden by output stats. You can either disable this check or make sure no in-place operations occur. See QuantCalibrationStatsCollector class documentation for more info.

## Quantize Model:

In [None]:
from distiller.quantization import PostTrainLinearQuantizer, LinearQuantMode
from copy import deepcopy
# Define the quantizer
quantizer = PostTrainLinearQuantizer(
    deepcopy(rnn_model),
    model_activation_stats='performance_rnn_pretrained_stats.yaml')

# Quantizer magic:

quantizer.prepare_model()

In [None]:
quantizer.model

In [None]:
criterion = nn.CrossEntropyLoss()
def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)
    

def evaluate(model):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        # The line below was fixed as per: https://github.com/pytorch/examples/issues/214
        for i in tqdm(range(0, data_source.size(0), sequence_len)):
            data, targets = get_batch(data_source, i)
            output, hidden = model(data, hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
            hidden = repackage_hidden(hidden)
    return total_loss / len(data_source)

In [None]:
evaluate(quantizer.model.to(device), val_data)

## Evaluate the perplexity of the original and quantized models.

In [None]:
from data import Dataset
from sequence import EventSeq
import torch.functional as F

data_path = "dataset/processed/ecomp_piano"
dataset = Dataset(data_path, verbose=True)
dataset_size = len(dataset.samples)
assert dataset_size > 0

# Eventually need to put these in YAML file.
controls = None
teacher_forcing_ratio = 1.0
loss_function = nn.CrossEntropyLoss()
log_softmax = nn.LogSoftmax(dim=1)
nnl = nn.NLLLoss(reduction='sum')




window_size = 200
stride_size = 10
use_transposition = False
control_ratio = 1.0
event_dim = EventSeq.dim()
batch_size = 2

batch_gen = dataset.batches(batch_size, window_size, stride_size)

In [None]:
model = quantizer.model.to(device)
model.eval()

In [None]:
import numpy.testing as nptest
import pdb

acc_loss = 0
N = 0
num_iters = 100

for iteration, (events, controls) in enumerate(batch_gen):
    if iteration == num_iters:
        break
        
    if use_transposition:
        offset = np.random.choice(np.arange(-6, 6))
        events, controls = utils.transposition(events, controls, offset)

    events = torch.LongTensor(events).to(device)
    assert events.shape[0] == window_size

    if np.random.random() < control_ratio:
        controls = torch.FloatTensor(controls).to(device)
        assert controls.shape[0] == window_size
    else:
        controls = None

    init = torch.randn(batch_size, model.init_dim).to(device)
    outputs = model.generate(init, window_size, events=events[:-1], controls=controls,
                             teacher_forcing_ratio=teacher_forcing_ratio, output_type='logit')
    

    assert outputs.shape[:2] == events.shape[:2]

    
    loss1 = loss_function(outputs.view(-1, event_dim), events.view(-1))
    pred = log_softmax(outputs.view(-1, event_dim))
    n = pred.shape[0]
    loss2 = nnl(pred, events.view(-1))
    acc_loss += loss2
    N += n
    
    #pdb.set_trace()
   
    # Check to make sure we're calculating the correct loss.
    nptest.assert_array_almost_equal(loss1.cpu().detach().numpy(), loss2.cpu().detach().numpy() / n)



acc_cross_entropy_loss = acc_loss / N
perplexity = acc_cross_entropy_loss.exp()
perplexity

# Collect Quant Stats Part II

In [None]:
from data import Dataset
from sequence import EventSeq
import torch.functional as F

data_path = "dataset/processed/ecomp_piano"
dataset = Dataset(data_path, verbose=True)
dataset_size = len(dataset.samples)
assert dataset_size > 0

# Eventually need to put these in YAML file.
controls = None
teacher_forcing_ratio = 1.0
loss_function = nn.CrossEntropyLoss()
log_softmax = nn.LogSoftmax(dim=1)
nnl = nn.NLLLoss(reduction='sum')




window_size = 200
stride_size = 10
use_transposition = False
control_ratio = 1.0
event_dim = EventSeq.dim()
batch_size = 2

batch_gen = dataset.batches(batch_size, window_size, stride_size)

In [None]:
model = quantizer.model.to(device)
model.eval()

In [None]:
# My version.
import os
from distiller.data_loggers import QuantCalibrationStatsCollector, collector_context

# Commented line is probably not necessary.
#man_model = torch.load('./manual.checkpoint.pth.tar')
distiller.utils.assign_layer_fq_names(rnn_model)
collector = QuantCalibrationStatsCollector(rnn_model,#,
                                           inplace_runtime_check=True)
                                           #disable_inplace_attrs=True) # I added this last argument.

# Random numbers.
batch_size = 64
max_len = 100

if not os.path.isfile('performance_rnn_pretrained_stats.yaml'):
    with collector_context(collector) as collector:
        init = torch.randn(batch_size, rnn_model.init_dim).to(device)
        output = rnn_model.generate(init, max_len)
        collector.save('performance_rnn_pretrained_stats.yaml')

In [None]:
import numpy.testing as nptest
import pdb
import os
from distiller.data_loggers import QuantCalibrationStatsCollector, collector_context

acc_loss = 0
N = 0
num_iters = 100

# Collect stats.
distiller.utils.assign_layer_fq_names(rnn_model)
collector = QuantCalibrationStatsCollector(rnn_model,#,
                                           inplace_runtime_check=True)

if not os.path.isfile('performance_rnn_pretrained_stats.yaml'):
    with collector_context(collector) as collector:

        
        for iteration, (events, controls) in enumerate(batch_gen):
            print(iteration)

            if iteration == num_iters:
                break

            if use_transposition:
                offset = np.random.choice(np.arange(-6, 6))
                events, controls = utils.transposition(events, controls, offset)

            events = torch.LongTensor(events).to(device)
            assert events.shape[0] == window_size

            if np.random.random() < control_ratio:
                controls = torch.FloatTensor(controls).to(device)
                assert controls.shape[0] == window_size
            else:
                controls = None

            init = torch.randn(batch_size, model.init_dim).to(device)
            outputs = model.generate(init, window_size, events=events[:-1], controls=controls,
                                     teacher_forcing_ratio=teacher_forcing_ratio, output_type='logit')


            assert outputs.shape[:2] == events.shape[:2]



        # Save stats.
        collector.save('performance_rnn_pretrained_stats.yaml')

In [None]:
not os.path.isfile('performance_rnn_pretrained_stats.yaml')
