# Initialize the accelerator

### Remember to install the following dependencies:

In [1]:
! apt-get install libhdf5-dev -y
! pip3 install versioned-hdf5

In [2]:
from finn_examples import models
print(list(filter(lambda x: "radioml" in x, dir(models))))

['_radioml_io_shape_dict', 'vgg10_w4a4_radioml']


In [3]:
# Note: the RadioML example is only available on the ZCU104 at the moment
accel = models.vgg10_w4a4_radioml()

In [4]:
print("Expected input shape and datatype: %s %s" % (str(accel.ishape_normal()), str(accel.idt())))
print("Expected output shape and datatype: %s %s" % (str(accel.oshape_normal()), str(accel.odt())))

Expected input shape and datatype: (1, 1024, 1, 2) DataType.INT8
Expected output shape and datatype: (1, 1) DataType.UINT8


# Load RadioML 2018 dataset

Please note that you will have to manually download the RadioML 2018 dataset and set the `dataset_dir` variable to point to its path.

In [5]:
import numpy as np
import math
import pickle
import os
import h5py

dataset_dir = "/home/xilinx/datasets/radioml_2018"
print(dataset_dir)

/home/xilinx/datasets/radioml_2018


In [6]:
h5_file = h5py.File(dataset_dir + "/GOLD_XYZ_OSC.0001_1024.hdf5",'r')
data_h5 = h5_file['X']
label_mod = np.argmax(h5_file['Y'], axis=1) # comes in one-hot encoding
label_snr = h5_file['Z'][:,0]

# assemble list of test set indices
# do not pre-load large dataset into memory
np.random.seed(2018)
test_indices = []
for mod in range(0, 24): #all modulations (0 to 23)
    for snr_idx in range(0, 26): #all SNRs (0 to 25 = -20dB to +30dB)
        start_idx = 26*4096*mod + 4096*snr_idx
        indices_subclass = list(range(start_idx, start_idx+4096))

        split = int(np.ceil(0.1 * 4096)) #90%/10% split
        np.random.shuffle(indices_subclass)
        train_indices_subclass, val_indices_subclass = indices_subclass[split:], indices_subclass[:split]

        if snr_idx >= 25: #select which SNRs to test on
            test_indices.extend(val_indices_subclass)

test_indices = sorted(test_indices)

# note: labels given in the "classes.txt" file are not in the correct order (https://github.com/radioML/dataset/issues/25)
mod_classes = ['OOK','4ASK','8ASK','BPSK','QPSK','8PSK','16PSK','32PSK',
'16APSK','32APSK','64APSK','128APSK','16QAM','32QAM','64QAM','128QAM','256QAM',
'AM-SSB-WC','AM-SSB-SC','AM-DSB-WC','AM-DSB-SC','FM','GMSK','OQPSK']
snr_classes = np.arange(-20., 32., 2) # -20dB to 30dB

In [7]:
print(data_h5.shape)
print(label_mod.shape)
print(label_snr.shape)
print(len(test_indices))

(2555904, 1024, 2)
(2555904,)
(2555904,)
9840


# Inspect a single frame

In [8]:
%matplotlib inline
from matplotlib import pyplot as plt

# Inspect a frame
mod = 12 # 0 to 23
snr_idx = 25 # 0 to 25 = -20dB to +30dB
sample = 123 # 0 to 4095
#-----------------------#
idx = 26*4096*mod + 4096*snr_idx + sample
data, mod, snr = data_h5[idx], label_mod[idx], label_snr[idx]
plt.figure()
plt.plot(data)
print("Modulation: %s, SNR: %.1f dB" % (mod_classes[mod], snr))

Modulation: 16QAM, SNR: 30.0 dB


# Input quantization
Quantize input data on-the-fly in software before feeding it to the accelerator. Use the uniform quantization range on which the model was trained.

In [9]:
def quantize(data):
    quant_min = -2.0
    quant_max = 2.0
    quant_range = quant_max - quant_min
    data_quant = (data - quant_min) / quant_range
    data_quant = np.round(data_quant * 256) - 128
    data_quant = np.clip(data_quant, -128, 127)
    data_quant = data_quant.astype(np.int8)
    return data_quant

# Classify a single frame

In [10]:
accel_in = quantize(data).reshape(accel.ishape_normal())
print("Input buffer shape is %s and datatype is %s" % (str(accel_in.shape), str(accel_in.dtype)))

Input buffer shape is (1, 1024, 1, 2) and datatype is int8


In [11]:
accel_out = accel.execute(accel_in)

In [12]:
print("Result: " + str(accel_out))
print("Top-1 class predicted by the accelerator: " + mod_classes[int(accel_out)])

Result: [[12.]]
Top-1 class predicted by the accelerator: 16QAM


In [13]:
%%timeit
accel_out = accel.execute(accel_in)

1000 loops, best of 3: 822 µs per loop


# Validate accuracy on entire test set

In [14]:
batch_size = 1024
accel.batch_size = batch_size
print("Accelerator buffer shapes are %s for input, %s for output" % (str(accel.ishape_packed()), str(accel.oshape_packed())))
print("Accelerator buffer shapes are %s for input, %s for output" % (str(accel.ishape_folded()), str(accel.oshape_folded())))
print("Accelerator buffer shapes are %s for input, %s for output" % (str(accel.ishape_normal()), str(accel.oshape_normal())))

Accelerator buffer shapes are (1024, 1024, 1, 1, 2) for input, (1024, 1, 1) for output
Accelerator buffer shapes are (1024, 1024, 1, 1, 2) for input, (1024, 1, 1) for output
Accelerator buffer shapes are (1024, 1024, 1, 2) for input, (1024, 1) for output


In [15]:
ok = 0
nok = 0
total = len(test_indices)
for i_batch in range(math.ceil(total/batch_size)):
    i_frame = i_batch*batch_size
    if i_frame+batch_size > total:
        batch_size = total - i_frame
        accel.batch_size = batch_size
    batch_indices = test_indices[i_frame:i_frame+batch_size]
    data, mod, snr = data_h5[batch_indices], label_mod[batch_indices], label_snr[batch_indices]

    ibuf = quantize(data).reshape(accel.ishape_normal())
    obuf = accel.execute(ibuf)

    pred = obuf.reshape(batch_size).astype(int)

    ok += np.equal(pred, mod).sum().item()
    nok += np.not_equal(pred, mod).sum().item()
    
    print("batch %d : total OK %d NOK %d" % (i_batch, ok, nok))

batch 0 : total OK 1018 NOK 6
batch 1 : total OK 2041 NOK 7
batch 2 : total OK 3059 NOK 13
batch 3 : total OK 4082 NOK 14
batch 4 : total OK 4948 NOK 172
batch 5 : total OK 5682 NOK 462
batch 6 : total OK 6314 NOK 854
batch 7 : total OK 7039 NOK 1153
batch 8 : total OK 8024 NOK 1192
batch 9 : total OK 8648 NOK 1192


In [16]:
acc = 100.0 * ok / (total)
print("Overall top-1 accuracy: {}%".format(acc))

Overall top-1 accuracy: 87.88617886178862%


## More benchmarking

In [17]:
accel.batch_size = 1024
accel.throughput_test()

{'DRAM_in_bandwidth[Mb/s]': 473.18806940706867,
 'DRAM_out_bandwidth[Mb/s]': 0.23104886201517025,
 'batch_size': 1024,
 'copy_input_data_to_device[ms]': 2.1643638610839844,
 'copy_output_data_from_device[ms]': 0.08821487426757812,
 'fclk[mhz]': 249.9975,
 'fold_input[ms]': 0.1418590545654297,
 'pack_input[ms]': 0.110626220703125,
 'runtime[ms]': 4.431962966918945,
 'throughput[images/s]': 231048.86201517025,
 'unfold_output[ms]': 0.08678436279296875,
 'unpack_output[ms]': 0.6284713745117188}