In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import  multiprocessing  as mp
import time
import numpy as np
import commpy as cp
import tensorflow as tf
from commpy.channelcoding import Trellis


from deepcom.model import NRSCDecoder           # Neural Decoder Model
from deepcom.metrics import BER, BLER           # metrics to benchmark Neural Decoder Model
from deepcom.utils import corrupt_signal        # simulate a AWGN Channel

from deepcom.dataset import create_dataset      # Create synthetic dataset
from deepcom.dataset import data_genenerator    # data loader for Tensorflow

import  matplotlib.pyplot  as plt

In [3]:
# Number of training data
BLOCK_LEN = 100
NUM_TRAINING_DATA = 120000
NUM_TESTING_DATA  = 20000

# ######################
# Network Architectures
# ######################
NUM_LAYERS = 2
NUM_HIDDEN_UNITS = 400

# ##############################
# Hyper-parameters for training
# ##############################
BATCH_SIZE = 200       # depends on size of GPU, should be a factor of num_data
LEARNING_RATE = 0.0004
DROPOUT_RATE= 0.7

# #######################
# Encoder Params
# #######################
CONSTRAINT_LEN = 3     # num of shifts in Conv. Encoder
TRACE_BACK_DEPTH = 15  # (?) a parameter Viterbi Encoder

G = np.array([[0o7, 0o5]]) 
M = np.array([CONSTRAINT_LEN - 1])
trellis = Trellis(M, G, feedback=0o7)

In [4]:
# Ref: Communication Algo via Deep Learning (page 5, last paragraph)
SNR_train = 1.0

# ############################
# Create dataset 
# #############################
print('Creating training data....')
X_train, Y_train = create_dataset(
    NUM_TRAINING_DATA, 
    BLOCK_LEN, 
    trellis, 
    snr=SNR_train, seed=2018, 
    num_cpus=mp.cpu_count())

print('Creating testing data....')
X_test, Y_test = create_dataset(
    NUM_TESTING_DATA, 
    BLOCK_LEN, 
    trellis, 
    snr=SNR_train, seed=1111, 
    num_cpus=mp.cpu_count())

print('Number of training sequences {}'.format(len(X_train)))
print('Number of testing sequences {}'.format(len(X_test)))

Creating training data....
Creating testing data....
Number of training sequences 120000
Number of testing sequences 20000


## Estimate Neural Decoder Optimal Performance 

* Since we learn in advance that **Viterbi is optimal solution for Block Error Rate** (BLER), we can compute what is the global minima (Optimal Solution).

In [5]:
def run_viterbi(message_bits, noisy_bits):
    # Viterbi Decoder on Conv. Code
    decoded_bits = cp.channelcoding.viterbi_decode(
        coded_bits=noisy_bits, 
        trellis=trellis,
        tb_depth=TRACE_BACK_DEPTH,
        decoding_type='unquantized')
    num_bit_errors_per_message = cp.utilities.hamming_dist(
        message_bits.astype(int),
        decoded_bits[:-int(M)].astype(int))
    return num_bit_errors_per_message

ORIGNAL_BITS = Y_test.reshape((-1, BLOCK_LEN))
NOISY_SIGNALS= X_test.reshape((-1, 2 * BLOCK_LEN + 4))
pool = mp.Pool(processes=mp.cpu_count())
try:
    snr = SNR_train
    print('Estimating Optimal Neural Decoder Performance at SNR = %.2f ' % snr)
    t0 = time.time()
    viterbi_hamm_dists = pool.starmap(
        func=run_viterbi, 
        iterable=[(msg_bits, noisy) for msg_bits, noisy in zip(ORIGNAL_BITS,NOISY_SIGNALS)])
    t1 = time.time()

    # Compute BER and BLER 
    vi_ber = sum(viterbi_hamm_dists) / np.product(np.shape(Y_test))
    vi_bler = np.count_nonzero(viterbi_hamm_dists) / len(Y_test)        
    print("[BER] = %.3f [BLER] = %.3f -- %3.3f s" % 
            (vi_ber, vi_bler, t1 - t0))
except Exception as e:
    print(e)
finally:
    pool.close()

Estimating Optimal Neural Decoder Performance at SNR = 1.00 
[BER] = 0.046 [BLER] = 0.764 -- 84.196 s


## Training Pipeline for Neural Decoder

In [6]:
tf.keras.backend.clear_session()

# Construct Neural Decoder
inputs = tf.keras.Input(shape=(None, 2))
outputs = NRSCDecoder(
    inputs, 
    is_training=True, 
    num_layers=NUM_LAYERS, 
    hidden_units=NUM_HIDDEN_UNITS, 
    dropout=DROPOUT_RATE)

model = tf.keras.Model(inputs, outputs)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, 2)           0         
_________________________________________________________________
bidirectional (Bidirectional (None, None, 800)         967200    
_________________________________________________________________
batch_normalization (BatchNo (None, None, 800)         3200      
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 800)         2882400   
_________________________________________________________________
batch_normalization_1 (Batch (None, None, 800)         3200      
_________________________________________________________________
time_distributed (TimeDistri (None, None, 1)           801       
Total params: 3,856,801
Trainable params: 3,853,601
Non-trainable params: 3,200
______________________________________________________________

In [7]:
# Set up Data Loader using tf.Dataset
X_train = X_train[:, :BLOCK_LEN, :]
X_test  = X_test[:,  :BLOCK_LEN, :]

train_set = data_genenerator(X_train, Y_train, BATCH_SIZE, shuffle=True)
test_set = data_genenerator(X_test, Y_test, BATCH_SIZE, shuffle=False)

# Save the best model
backup = tf.keras.callbacks.ModelCheckpoint(                     
  filepath='BiGRU.hdf5',
  monitor='val_BLER',
  save_best_only=True)
    
# Stop training early if the model seems to overfit
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.0,
    patience=10,
    verbose=0, mode='auto')

# Learning rate scheduler
def scheduler(epoch):
  if epoch > 10 and epoch <=15:
      lr = LEARNING_RATE/10.0
  elif epoch >15 and epoch <=20:
      lr = LEARNING_RATE/100.0
  elif epoch >20 and epoch <=25:
      lr =LEARNING_RATE/1000.0
  elif epoch > 25:
      lr = LEARNING_RATE/10000.0
  else:
      lr =LEARNING_RATE
  return lr
  
change_lr = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

model.compile(
    tf.keras.optimizers.Adam(LEARNING_RATE), 
    'binary_crossentropy', [BER, BLER])

history = model.fit(
    train_set.make_one_shot_iterator(), 
    steps_per_epoch=len(X_train) //BATCH_SIZE, 
    validation_data=test_set.make_one_shot_iterator(),
    validation_steps= len(X_test) //BATCH_SIZE,
    callbacks=[early_stopping, backup, change_lr],
    epochs=20)

Epoch 1/20

Epoch 00001: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 2/20

Epoch 00002: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 3/20

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 4/20

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 5/20

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 6/20

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 7/20

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 8/20

Epoch 00008: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 9/20

Epoch 00009: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 10/20

Epoch 00010: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 11/20

Epoch 00011: LearningRateScheduler reducing learning rate to 0.0004.
Epoch 12/20

Epoch 00012: LearningRateScheduler reducing learning rate to 4e-05.
Epoch 13/20

Epoch 00013: 

KeyboardInterrupt: 

In [8]:
# Count of the number of epochs# Count 
epochs = range(1, len(history.history['loss']) + 1)
# Visualize loss history
plt.figure(figsize=(8, 6))
plt.plot(epochs, history.history['loss'], 'r--')
plt.plot(epochs, history.history['val_loss'], 'b-')
plt.legend(['Training Loss', 'Test Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.show()

NameError: name 'history' is not defined

## Evaluate Decoder

In [9]:
# Load best-fit model
model = tf.keras.models.load_model('BiGRU.hdf5',{'BER': BER, 'BLER': BLER})

In [10]:
predictions = model.predict(X_test, batch_size=BATCH_SIZE).round()

hamming_dists = [cp.utilities.hamming_dist(
        x.astype(int),
        y.astype(int)
    ) for x, y in zip(predictions, Y_test)]

                        
print('BER: %.4f' % (np.sum(hamming_dists) /  np.product(np.shape(Y_test))))
print('BLER: %.4f'% (np.count_nonzero(hamming_dists) / len(Y_test)))

BER: 0.0497
BLER: 0.8920
