In [1]:
import numpy as np
from optic.models.devices import mzm, photodiode
from optic.models.channels import linearFiberChannel
from optic.comm.sources import bitSource
from optic.comm.modulation import modulateGray
from optic.comm.metrics import bert
from optic.dsp.core import firFilter, pulseShape, upsample, pnorm, anorm
from optic.utils import parameters, dBm2W
from scipy.special import erfc

import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, Model



In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models, initializers


def build_dpd_model():
    # should i change the first dim to None as per gemeni did? - cuz i feel like the next layer would not slide accross so id need to do the windowing manually as a preproc step
    # id say for now since it works/makes sense dont try to fix it, do the preprocessing manually and dont assume the below filter slides accross automatically.
    # update: apparently it does slide through, you just change the batch size (inference) to 1 not N/101, for now what you have just makes sense so play around with that later.
    # the "1" dimension is for features, it can be 2 for say an I/Q signal - but apparently here they made two seperate nets for I and Q so ud still use 1
    # I think the reason why batch size is mandatory to have in CNNs is cuz usually you'd pass an infintely long signal (or too long) unlike a typical dataset.
    # so almost always you'd wanna apply batching to reduce memory footprint.
    # but that's different from the "timestep" element which is the first dimension here (the 101 i chose, but can be anything .. maybe even 500 - play around w/ it.)
    # i mean since ill be applying windowing manually so i should get the same ooutput regardless.
    # lets now stick to what i understand - signal of length N -> reshape to N,1 -> apply a sliding window so it's (N-101, 101, 1) -> pass to the model
    # the thing to try for later is .... set the input shape to (None,1), and pass the input as (1, N, 1) and get your CNN to slide accross automatically for you
    # both should yield the same result - but my QS is why would you need to do batching in the first place and why is it not necessary to do for e.g. in regular NNs?
    # thats just purely an API design choice - nothing too crazy here.

    inputs = layers.Input(shape=(101,1))


    # QS here, why is your filer 3dimensional for a 1D operation?
    # 1D or 2D in CNNs refer to the sliding dimension, in 1D -> it's a single one way, in 2D, it slides in the X and Y directions
    # but that doesn't mean that your input array cant be multi-dimensional, in that case your filter would need to have a shape to basically fit on it.

    # so if your X input is (T,2), your filter would be F,2 as well, so there's weight parameters in the second dimension as well.
    # now what about the third dimension (e.g. here it's 101,1,1) - that's your filter count, sometimes you may need to capture the corellation to multiple features at once
    # so you'd use multiple filters for that.
    kernel_init_A = np.zeros((101, 1, 1)) 
    kernel_init_A[50, 0, 0] = 1.0
    sec_a = layers.Conv1D(filters=1, kernel_size=101, padding='valid', use_bias=False,
                            kernel_initializer=initializers.Constant(kernel_init_A))(inputs) # note the choice of padding matters here, 'same' adds padding so out dim is (101,1)



    x = layers.Dense(12, activation=layers.LeakyReLU(negative_slope=0.1))(sec_a)
    x = layers.Dense(8, activation=layers.LeakyReLU(negative_slope=0.1))(x)
    x = layers.Dense(8, activation=layers.LeakyReLU(negative_slope=0.1))(x)
    nonlinear_out = layers.Dense(1, activation='linear')(x) # Final sum to 1 neuron

    sec_b = layers.Add()([sec_a, nonlinear_out])




    outputs = sec_b

    return models.Model(inputs, outputs)

In [3]:
import numpy as np

def create_sliding_windows(data, window_size):
    """
    Converts a 1D array into a 3D windowed dataset with the same output length.
    """
    data = np.asarray(data)
    
    # Pad the beginning of the data with zeros 
    # (window_size - 1) pads ensures the first window contains the first element
    padding_size = window_size - 1
    padded_data = np.pad(data, (padding_size, 0), mode='constant', constant_values=0)
    
    # Now the number of windows will equal len(data)
    num_windows = len(padded_data) - window_size + 1
    
    # Efficient window creation
    windows = [padded_data[i : i + window_size] for i in range(num_windows)]
    
    # Convert to (Samples, Window_Size, Features)
    X = np.array(windows)
    return X[..., np.newaxis]



In [7]:
# simulation parameters
SpS = 16  # samples per symbol
M = 2  # order of the modulation format
Rs = 10e9  # Symbol rate
Fs = SpS * Rs  # Signal sampling frequency (samples/second)
Pi_dBm = 3  # laser optical power at the input of the MZM in dBm
Pi = dBm2W(Pi_dBm)  # convert from dBm to W

# Bit source parameters
paramBits = parameters()
paramBits.nBits = 100000  # number of bits to be generated
paramBits.mode = 'random' # mode of the bit source 
paramBits.seed = 123      # seed for the random number generator

# pulse shaping parameters
paramPulse = parameters()
paramPulse.pulseType = 'nrz'  # pulse shape type
paramPulse.SpS = SpS     # samples per symbol  

# MZM parameters
paramMZM = parameters()
paramMZM.Vpi = 2
paramMZM.Vb = -paramMZM.Vpi / 2

# linear fiber optical channel parameters
paramCh = parameters()
paramCh.L = 100        # total link distance [km]
paramCh.alpha = 0.2    # fiber loss parameter [dB/km]
paramCh.D = 16         # fiber dispersion parameter [ps/nm/km]
paramCh.Fc = 193.1e12  # central optical frequency [Hz]
paramCh.Fs = Fs

# photodiode parameters
paramPD = parameters()
paramPD.ideal = False
paramPD.B = Rs
paramPD.Fs = Fs
paramPD.seed = 456  # seed for the random number generator


# DPD Models:
#dpd_model_copy = build_dpd_model()
dpd_model = build_dpd_model()
dpd_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mse')


BER_list = []
Q_list = []

for i in range(9):
    ## Starting Simulation

    # generate pseudo-random bit sequence
    bitsTx = bitSource(paramBits)

    # generate 2-PAM modulated symbol sequence
    symbTx = modulateGray(bitsTx, M, "pam")



    ds = tf.data.Dataset.from_tensor_slices(create_sliding_windows(symbTx, 101)).shuffle(10000).batch(4096).prefetch(tf.data.AUTOTUNE)
    symbTx_dpd = dpd_model.predict(ds).flatten()
    
        
    # upsampling
    symbolsUp = upsample(symbTx_dpd, SpS)

    # pulse shaping
    pulse = pulseShape(paramPulse)
    sigTx = firFilter(pulse, symbolsUp)
    sigTx = anorm(sigTx) # normalize to 1 Vpp

    # optical modulation
    Ai = np.sqrt(Pi)  # ideal cw laser constant envelope
    sigTxo = mzm(Ai, sigTx, paramMZM)

    # linear fiber channel model
    sigCh = linearFiberChannel(sigTxo, paramCh)

    # noisy PD (thermal noise + shot noise + bandwidth limit)
    I_Rx = photodiode(sigCh, paramPD)

    # capture samples in the middle of signaling intervals
    I_Rx = I_Rx[0::SpS]

    ds = tf.data.Dataset.from_tensor_slices((create_sliding_windows(I_Rx, 101), create_sliding_windows(symbTx_dpd, 101))).shuffle(10000).batch(4096).prefetch(tf.data.AUTOTUNE)
    dpd_model.fit(ds, epochs=10, verbose=1)

    # PERFORMANCE METRICS
    BER, Q = bert(I_Rx, bitsTx) # BER and Q-factor
    print(f"Q-factor = {Q:.2f} ")
    print(f"BER = {BER:.2e}")

    BER_list.append(BER)
    Q_list.append(Q)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.5475
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.5352
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.5235
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.5126
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.5025
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.4935
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.4855
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.4787
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.4730
Epoch 10/10
[1m25/25[0m 

In [5]:
symbTx_dpd

array([-3.159619  ,  0.8715137 , -2.8771253 , ..., -1.7784505 ,
       -0.15786298,  2.7018905 ], dtype=float32)

you must be also looking into Q-factor values not just BER. BER on itself isnt enough.
UPDATE THE PAPER USES SNR INSTEAD