# Initialize the accelerator

In [1]:
# remember to install the following dependencies
#! apt-get install libhdf5-dev -y
#! pip install versioned-hdf5

In [2]:
from finn_examples import models
print(list(filter(lambda x: "radioml" in x, dir(models))))

['_radioml_io_shape_dict', 'vgg10_w2a2_radioml']


In [3]:
accel = models.vgg10_w4a3_radioml()
#some systems might require a manual platform setting:
#accel = models.vgg10_w4a3_radioml("ZCU102")

  round(freq_high / q0, 5)))


In [4]:
print("Expected input shape and datatype: %s %s" % (str(accel.ishape_normal(0)), str(accel.idt(0))))
print("Expected output shape and datatype: %s %s" % (str(accel.oshape_normal(0)), str(accel.odt(0))))

Expected input shape and datatype: (1, 1024, 1, 2) DataType.INT8
Expected output shape and datatype: (1, 1) DataType.UINT8


# Load RadioML 2018 dataset

In [5]:
import numpy as np
import math
import pickle
import os
import h5py

dataset_dir = "/mnt/radioml_2018"
print(dataset_dir)

/mnt/radioml_2018


In [6]:
h5_file = h5py.File(dataset_dir + "/GOLD_XYZ_OSC.0001_1024.hdf5",'r')
data_h5 = h5_file['X']
label_mod = np.argmax(h5_file['Y'], axis=1) # comes in one-hot encoding
label_snr = h5_file['Z'][:,0]

# assemble list of test set indices
# do not pre-load large dataset into memory
np.random.seed(2018)
test_indices = []
for mod in range(0, 24): #all modulations (0 to 23)
    for snr_idx in range(0, 26): #all SNRs (0 to 25 = -20dB to +30dB)
        start_idx = 26*4096*mod + 4096*snr_idx
        indices_subclass = list(range(start_idx, start_idx+4096))

        split = int(np.ceil(0.1 * 4096)) #90%/10% split
        np.random.shuffle(indices_subclass)
        train_indices_subclass, val_indices_subclass = indices_subclass[split:], indices_subclass[:split]

        if snr_idx >= 0: #select which SNRs to test on
            test_indices.extend(val_indices_subclass)

test_indices = sorted(test_indices)

# note: labels given in the "classes.txt" file are not in the correct order (https://github.com/radioML/dataset/issues/25)
mod_classes = ['OOK','4ASK','8ASK','BPSK','QPSK','8PSK','16PSK','32PSK',
'16APSK','32APSK','64APSK','128APSK','16QAM','32QAM','64QAM','128QAM','256QAM',
'AM-SSB-WC','AM-SSB-SC','AM-DSB-WC','AM-DSB-SC','FM','GMSK','OQPSK']
snr_classes = np.arange(-20., 32., 2) # -20dB to 30dB

In [7]:
print(data_h5.shape)
print(label_mod.shape)
print(label_snr.shape)
print(len(test_indices))

(2555904, 1024, 2)
(2555904,)
(2555904,)
255840


# Inspect a single frame

In [8]:
from matplotlib import pyplot as plt

# Inspect a frame
mod = 12 # 0 to 23
snr_idx = 25 # 0 to 25 = -20dB to +30dB
sample = 123 # 0 to 4095
#-----------------------#
idx = 26*4096*mod + 4096*snr_idx + sample
data, mod, snr = data_h5[idx], label_mod[idx], label_snr[idx]
plt.figure()
plt.plot(data)
print("Modulation: %s, SNR: %.1f dB" % (mod_classes[mod], snr))

Modulation: 16QAM, SNR: 30.0 dB


# Input quantization
Quantize input data on-the-fly in software before feeding it to the accelerator. Use the uniform quantization range on which the model was trained.

In [9]:
def quantize(data):
    quant_min = -1.7981509
    quant_max = 1.7840475
    quant_range = quant_max - quant_min
    data_quant = (data - quant_min) / quant_range
    data_quant = np.round(data_quant * 256) - 128
    data_quant = np.clip(data_quant, -128, 127)
    data_quant = data_quant.astype(np.int8)
    return data_quant

# Classify a single frame

In [10]:
accel_in = quantize(data).reshape(accel.ishape_normal(0))
print("Input buffer shape is %s and datatype is %s" % (str(accel_in.shape), str(accel_in.dtype)))

Input buffer shape is (1, 1024, 1, 2) and datatype is int8


In [11]:
accel_out = accel.execute(accel_in)
#accel_out = post_process(accel_out)

In [12]:
print("Result: " + str(accel_out))
print("Top-1 class predicted by the accelerator: " + mod_classes[int(accel_out)])

Result: [[12.]]
Top-1 class predicted by the accelerator: 16QAM


In [13]:
%%timeit
accel_out = accel.execute(accel_in)

1000 loops, best of 3: 1.01 ms per loop


# Validate accuracy on entire test set

In [14]:
batch_size = 1024
accel.batch_size = batch_size
print("Accelerator buffer shapes are %s for input, %s for output" % (str(accel.ishape_packed(0)), str(accel.oshape_packed(0))) )
print("Accelerator buffer shapes are %s for input, %s for output" % (str(accel.ishape_folded(0)), str(accel.oshape_folded(0))) )
print("Accelerator buffer shapes are %s for input, %s for output" % (str(accel.ishape_normal(0)), str(accel.oshape_normal(0))) )

Accelerator buffer shapes are (1024, 1024, 1, 2, 1) for input, (1024, 1, 1) for output
Accelerator buffer shapes are (1024, 1024, 1, 2, 1) for input, (1024, 1, 1) for output
Accelerator buffer shapes are (1024, 1024, 1, 2) for input, (1024, 1) for output


In [15]:
ok = 0
nok = 0
total = len(test_indices)
for i_batch in range(math.ceil(total/batch_size)):
    i_frame = i_batch*batch_size
    if i_frame+batch_size > total:
        batch_size = total - i_frame
        accel.batch_size = batch_size
    batch_indices = test_indices[i_frame:i_frame+batch_size]
    data, mod, snr = data_h5[batch_indices], label_mod[batch_indices], label_snr[batch_indices]

    ibuf = quantize(data).reshape(accel.ishape_normal(0))
    obuf = accel.execute(ibuf)

    pred = obuf.reshape(batch_size).astype(int)

    ok += np.equal(pred, mod).sum().item()
    nok += np.not_equal(pred, mod).sum().item()
    
    print("batch %d : total OK %d NOK %d" % (i_batch, ok, nok))

batch 0 : total OK 5 NOK 1019
batch 1 : total OK 51 NOK 1997
batch 2 : total OK 520 NOK 2552
batch 3 : total OK 1370 NOK 2726
batch 4 : total OK 2391 NOK 2729
batch 5 : total OK 3415 NOK 2729
batch 6 : total OK 4439 NOK 2729
batch 7 : total OK 5463 NOK 2729
batch 8 : total OK 6487 NOK 2729
batch 9 : total OK 7511 NOK 2729
batch 10 : total OK 7931 NOK 3333
batch 11 : total OK 7934 NOK 4354
batch 12 : total OK 7993 NOK 5319
batch 13 : total OK 8360 NOK 5976
batch 14 : total OK 9056 NOK 6304
batch 15 : total OK 9940 NOK 6444
batch 16 : total OK 10957 NOK 6451
batch 17 : total OK 11977 NOK 6455
batch 18 : total OK 13000 NOK 6456
batch 19 : total OK 14020 NOK 6460
batch 20 : total OK 14864 NOK 6640
batch 21 : total OK 14923 NOK 7605
batch 22 : total OK 15078 NOK 8474
batch 23 : total OK 15374 NOK 9202
batch 24 : total OK 16120 NOK 9480
batch 25 : total OK 17031 NOK 9593
batch 26 : total OK 18028 NOK 9620
batch 27 : total OK 19051 NOK 9621
batch 28 : total OK 20068 NOK 9628
batch 29 : total 

batch 225 : total OK 132074 NOK 99350
batch 226 : total OK 133098 NOK 99350
batch 227 : total OK 134122 NOK 99350
batch 228 : total OK 135146 NOK 99350
batch 229 : total OK 135170 NOK 100350
batch 230 : total OK 135173 NOK 101371
batch 231 : total OK 135266 NOK 102302
batch 232 : total OK 136058 NOK 102534
batch 233 : total OK 137082 NOK 102534
batch 234 : total OK 138106 NOK 102534
batch 235 : total OK 139130 NOK 102534
batch 236 : total OK 140154 NOK 102534
batch 237 : total OK 141178 NOK 102534
batch 238 : total OK 142202 NOK 102534
batch 239 : total OK 142647 NOK 103113
batch 240 : total OK 142647 NOK 104137
batch 241 : total OK 142649 NOK 105159
batch 242 : total OK 142742 NOK 106090
batch 243 : total OK 143513 NOK 106343
batch 244 : total OK 144535 NOK 106345
batch 245 : total OK 145559 NOK 106345
batch 246 : total OK 146583 NOK 106345
batch 247 : total OK 147607 NOK 106345
batch 248 : total OK 148631 NOK 106345
batch 249 : total OK 149495 NOK 106345


In [16]:
acc = 100.0 * ok / (total)
print("Overall top-1 accuracy: {}%".format(acc))

Overall top-1 accuracy: 58.43300500312696%


## More benchmarking

In [18]:
accel.batch_size = 1024
accel.throughput_test()

{'DRAM_in_bandwidth[Mb/s]': 64.74523228253237,
 'DRAM_out_bandwidth[Mb/s]': 0.03161388295045526,
 'batch_size': 1024,
 'copy_input_data_to_device[ms]': 2.189159393310547,
 'copy_output_data_from_device[ms]': 0.08916854858398438,
 'fclk[mhz]': 187.498125,
 'fold_input[ms]': 0.1010894775390625,
 'pack_input[ms]': 0.1556873321533203,
 'runtime[ms]': 32.39083290100098,
 'throughput[images/s]': 31613.88295045526,
 'unfold_output[ms]': 0.08726119995117188,
 'unpack_output[ms]': 0.6263256072998047}