In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import  multiprocessing  as mp
import time
import numpy as np
import commpy as cp
import tensorflow as tf
from commpy.channelcoding import Trellis


from deepcom.model import NRSCDecoder           # Neural Decoder Model
from deepcom.metrics import BER, BLER           # metrics to benchmark Neural Decoder Model
from deepcom.utils import corrupt_signal        # simulate a AWGN Channel

from deepcom.dataset import create_bsc_dataset  # Create synthetic dataset
from deepcom.dataset import data_genenerator    # data loader for Tensorflow

import  matplotlib.pyplot  as plt

In [3]:
# Number of training data
BLOCK_LEN = 100
NUM_TRAINING_DATA = 120000
NUM_TESTING_DATA  = 4000

# ######################
# Network Architectures
# ######################
NUM_LAYERS = 2
NUM_HIDDEN_UNITS = 400

# ##############################
# Hyper-parameters for training
# ##############################
BATCH_SIZE = 800       # depends on size of GPU, should be a factor of num_data
LEARNING_RATE = 0.001
DROPOUT_RATE= 0.5

# #######################
# Encoder Params
# #######################
CONSTRAINT_LEN = 3     # num of shifts in Conv. Encoder
TRACE_BACK_DEPTH = 15  # (?) a parameter Viterbi Encoder

G = np.array([[0o7, 0o5]]) 
M = np.array([CONSTRAINT_LEN - 1])
trellis = Trellis(M, G, feedback=0o7)

In [4]:
# Ref: Communication Algo via Deep Learning (page 5, last paragraph)
Error_Prob = 0.08

# ############################
# Create dataset 
# #############################
print('Creating training data....')
X_train, Y_train = create_bsc_dataset(
    NUM_TRAINING_DATA, 
    BLOCK_LEN, 
    trellis, 
    error_prob=Error_Prob, seed=2018)

print('Creating testing data....')
X_test, Y_test = create_bsc_dataset(
    NUM_TESTING_DATA, 
    BLOCK_LEN, 
    trellis, 
    error_prob=Error_Prob,  seed=1111)

# X_test, Y_test = X_train, Y_train
print('Number of training sequences {}'.format(len(X_train)))
print('Number of testing sequences {}'.format(len(X_test)))

Creating training data....
Creating testing data....
Number of training sequences 120000
Number of testing sequences 4000


## Estimate Neural Decoder Optimal Performance 

* Since we learn in advance that **Viterbi is optimal solution for Block Error Rate** (BLER), we can compute what is the global minima (Optimal Solution).

In [5]:
def run_viterbi(message_bits, noisy_bits):
    decoded_bits = cp.channelcoding.viterbi_decode(
        coded_bits=noisy_bits.astype(float), 
        trellis=trellis,
        tb_depth=TRACE_BACK_DEPTH,
        decoding_type='hard')
    num_bit_errors_per_message = cp.utilities.hamming_dist(
        message_bits.astype(int),
        decoded_bits[:-int(M)].astype(int))
    return num_bit_errors_per_message

ORIGNAL_BITS = Y_test.reshape((-1, BLOCK_LEN))
NOISY_SIGNALS= X_test.reshape((-1, 2 * BLOCK_LEN + 4))
pool = mp.Pool(processes=mp.cpu_count())
try:
    error_prob = Error_Prob
    print('Estimating Optimal Neural Decoder Performance at Error Probability = %.2f ' % error_prob)
    t0 = time.time()
    viterbi_hamm_dists = pool.starmap(
        func=run_viterbi, 
        iterable=[(msg_bits, noisy) for msg_bits, noisy in zip(ORIGNAL_BITS,NOISY_SIGNALS)])
    t1 = time.time()

    # Compute BER and BLER 
    vi_ber = sum(viterbi_hamm_dists) / np.product(np.shape(Y_test))
    vi_bler = np.count_nonzero(viterbi_hamm_dists) / len(Y_test)        
    print("[BER] = %.3f [BLER] = %.3f -- %3.3f s" % 
            (vi_ber, vi_bler, t1 - t0))
except Exception as e:
    print(e)
finally:
    pool.close()

Estimating Optimal Neural Decoder Performance at Error Probability = 0.08 
[BER] = 0.038 [BLER] = 0.728 -- 15.040 s


## Training Pipeline for Neural Decoder

In [6]:
tf.keras.backend.clear_session()

# Construct Neural Decoder
inputs = tf.keras.Input(shape=(None, 2))
outputs = NRSCDecoder(
    inputs, 
    is_training=True, 
    num_layers=NUM_LAYERS, 
    hidden_units=NUM_HIDDEN_UNITS, 
    dropout=DROPOUT_RATE)

model = tf.keras.Model(inputs, outputs)
model.compile(
    tf.keras.optimizers.SGD(lr=LEARNING_RATE, momentum=0.9, nesterov=True), 
    'binary_crossentropy', [BER, BLER])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, 2)           0         
_________________________________________________________________
bidirectional (Bidirectional (None, None, 800)         967200    
_________________________________________________________________
batch_normalization (BatchNo (None, None, 800)         3200      
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 800)         2882400   
_________________________________________________________________
batch_normalization_1 (Batch (None, None, 800)         3200      
_________________________________________________________________
time_distributed (TimeDistri (None, None, 1)           801       
Total params: 3,856,801
Trainable params: 3,853,601
Non-trainable params: 3,200
______________________________________________________________

In [7]:
def scheduler(epoch):
    if epoch < 10:
        return LEARNING_RATE
    elif 10 <= epoch < 15:
        return LEARNING_RATE/100.
    else:
       return LEARNING_RATE / 1000.

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

In [None]:
# Set up Data Loader using tf.Dataset
X_train = X_train[:, :BLOCK_LEN, :]
X_test  = X_test[:,  :BLOCK_LEN, :]

train_set = data_genenerator(X_train, Y_train, BATCH_SIZE, shuffle=True)
test_set = data_genenerator(X_test, Y_test, BATCH_SIZE, shuffle=False)

# Save the best model
backup = tf.keras.callbacks.ModelCheckpoint(                     
  filepath='BiGRU_BSC.hdf5',
  monitor='val_BLER',
  save_best_only=True)

history = model.fit(
    train_set.make_one_shot_iterator(), 
    steps_per_epoch=len(X_train) //BATCH_SIZE, 
    validation_data=test_set.make_one_shot_iterator(),
    validation_steps= len(X_test) //BATCH_SIZE,
    callbacks=[backup, lr_scheduler],
    epochs=20)

Epoch 1/20

Epoch 00001: LearningRateScheduler reducing learning rate to 0.001.
Epoch 2/20

Epoch 00002: LearningRateScheduler reducing learning rate to 0.001.
Epoch 3/20

Epoch 00003: LearningRateScheduler reducing learning rate to 0.001.

In [None]:
# Count of the number of epochs# Count 
epochs = range(1, len(history.history['loss']) + 1)
# Visualize loss history
plt.figure(figsize=(8, 6))
plt.plot(epochs, history.history['loss'], 'r--')
plt.plot(epochs, history.history['val_loss'], 'b-')
plt.legend(['Training Loss', 'Test Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.show()

## Evaluate Decoder

In [None]:
# Load best-fit model
model = tf.keras.models.load_model('BiGRU_BSC.hdf5',{'BER': BER, 'BLER': BLER})

In [None]:
predictions = model.predict(X_test, batch_size=BATCH_SIZE).round()

hamming_dists = [cp.utilities.hamming_dist(
        x.astype(int),
        y.astype(int)
    ) for x, y in zip(predictions, Y_test)]

                        
print('BER: %.4f' % (np.sum(hamming_dists) /  np.product(np.shape(Y_test))))
print('BLER: %.4f'% (np.count_nonzero(hamming_dists) / len(Y_test)))