### Import data from files

In [None]:
import h5py
import pandas as pd

IQ_data_file = h5py.File("data.hdf5", "r")
raw_IQ_train = IQ_data_file["train"][:]
raw_IQ_test = IQ_data_file["test"][:]

print("Raw I/Q training data has shape:", raw_IQ_train.shape)
print("Raw I/Q testing data has shape:", raw_IQ_test.shape)

training_labels = pd.read_csv("train_labels.csv").to_numpy()[:,1]

print("Training label data has shape:", training_labels.shape)
df = pd.DataFrame(training_labels, columns=["Modulation Category"])
df.head(10)

### Map training label data to integers and create new label vectors

In [None]:
import numpy as np

N_PARTITIONS = 4
labels = ["FM","OQPSK","BPSK","8PSK","AM-SSB-SC","4ASK","16PSK","AM-DSB-SC","QPSK","OOK"]
label_to_int, int_to_label = dict(), dict()
for i,label in enumerate(labels):
    label_to_int[label] = i
    int_to_label[i] = label
convert_label_to_int = lambda l : label_to_int[l]

# Create N_PARTITIONS duplicates since signal data is also being partitioned
y_train = np.array(list(map(convert_label_to_int, np.repeat(training_labels, N_PARTITIONS))))

print("Label-to-integer mappings:", label_to_int)
print("Expanded training label data has shape:", y_train.shape)
for i in range(len(labels)):
    print("Number of examples with label " + int_to_label[i] + " is:",  sum(y_train==i))
df = pd.DataFrame(y_train, columns=["Modulation Category"])
df.head(10)

### Partition signals to create smaller signals as training data

In [None]:
I_train = raw_IQ_train[:,:,0]
Q_train = raw_IQ_train[:,:,1]
I_test = raw_IQ_test[:,:,0]
Q_test = raw_IQ_test[:,:,1]
N_TRAIN = raw_IQ_train.shape[0]
N_TEST = raw_IQ_test.shape[0]
N_SAMPLES = int(raw_IQ_train.shape[1] / N_PARTITIONS)
N_CHANNELS = raw_IQ_train.shape[2]

complex_train = np.ndarray(((N_TRAIN*N_PARTITIONS), N_SAMPLES), dtype=np.complex64)
complex_test = np.ndarray(((N_TEST*N_PARTITIONS), N_SAMPLES), dtype=np.complex64)

if N_PARTITIONS == 1:
    complex_train = I_train + 1j*Q_train
    complex_test = I_test + 1j*Q_test
else:
    for n in range(N_TRAIN):
        start_slot = N_PARTITIONS*n
        for i in range(N_PARTITIONS):
            curr_slot = start_slot + i
            start_idx = N_SAMPLES*i
            end_idx = start_idx + N_SAMPLES
            complex_train[curr_slot] = I_train[n,start_idx:end_idx] + 1j*Q_train[n,start_idx:end_idx]
            if n < N_TEST:
                complex_test[curr_slot] = I_test[n,start_idx:end_idx] + 1j*Q_test[n,start_idx:end_idx]

    print("Expanded complex-valued training data has shape:", complex_train.shape)
    print("Expanded complex-valued testing data has shape:", complex_test.shape)

    # Verify construction
    for n in range(N_TRAIN):
        if N_PARTITIONS == 16:
            X1 = np.concatenate((complex_train[16*n],complex_train[16*n+1],complex_train[16*n+2],complex_train[16*n+3],
                                 complex_train[16*n+4],complex_train[16*n+5],complex_train[16*n+6],complex_train[16*n+7]))
            X2 = np.concatenate((complex_train[16*n+8],complex_train[16*n+9],complex_train[16*n+10],complex_train[16*n+11],
                                 complex_train[16*n+12],complex_train[16*n+13],complex_train[16*n+14],complex_train[16*n+15]))
            X = np.concatenate((X1, X2))
        elif N_PARTITIONS == 8:
            X1 = np.concatenate((complex_train[8*n],complex_train[8*n+1],complex_train[8*n+2],complex_train[8*n+3]))
            X2 = np.concatenate((complex_train[8*n+4],complex_train[8*n+5],complex_train[8*n+6],complex_train[8*n+7]))
            X = np.concatenate((X1, X2))
        elif N_PARTITIONS == 4:
            X = np.concatenate((complex_train[4*n],complex_train[4*n+1],complex_train[4*n+2],complex_train[4*n+3]))
        elif N_PARTITIONS == 2:
            X = np.concatenate((complex_train[2*n],complex_train[2*n+1]))
        else:
            raise ValueError("Partition size must be 1, 2, 4, or 8")
        X_prime = I_train[n] + 1j*Q_train[n]
        if not np.array_equal(X, X_prime):
            raise ValueError("Construction failed")

df = pd.DataFrame(complex_train)
df.head(10)

### Create feature tensors

In [None]:
N_TRAIN_EX = complex_train.shape[0]
N_TEST_EX = complex_test.shape[0]
N_FEATURES = 4

X_train = np.zeros((N_TRAIN_EX, N_SAMPLES, N_FEATURES))
X_test = np.zeros((N_TEST_EX, N_SAMPLES, N_FEATURES))
for dataset_len in [N_TRAIN_EX, N_TEST_EX]:
    for i in range(dataset_len):
        if dataset_len == N_TRAIN_EX:
            I = np.real(complex_train)[i]
            Q = np.imag(complex_train)[i]
        else:
            I = np.real(complex_test)[i]
            Q = np.imag(complex_test)[i]
        phase = np.arctan2(Q, I)
        amp = np.sqrt(Q**2 + I**2)
        if dataset_len == N_TRAIN_EX:
            X_train[i,:,0] = I
            X_train[i,:,1] = Q
            X_train[i,:,2] = phase
            X_train[i,:,3] = amp
        else:
            X_test[i,:,0] = I
            X_test[i,:,1] = Q
            X_test[i,:,2] = phase
            X_test[i,:,3] = amp

train_shape = X_train.shape
test_shape = X_test.shape

print("Training feature tensor has shape:", X_train.shape)
print("Testing feature tensor has shape:", X_test.shape)

### Define convolutional neural network architecture

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Activation, Flatten, BatchNormalization

def cnn(INPUT_SIZE, N_CLASSES):
    model = Sequential()
    model.add(Conv1D(64, 3, input_shape=INPUT_SIZE))
    model.add(Activation("selu"))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 3, input_shape=INPUT_SIZE))
    model.add(Activation("selu"))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 3, input_shape=INPUT_SIZE))
    model.add(Activation("selu"))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 3, input_shape=INPUT_SIZE))
    model.add(Activation("selu"))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(64, 3, input_shape=INPUT_SIZE))
    model.add(Activation("selu"))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(128, activation="selu"))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation="selu"))
    model.add(Dense(N_CLASSES, activation="softmax"))
    return model
    

### Train CNN model

In [None]:
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam

BOOTSTRAP = True
N_CLASSES = 10
N_EPOCHS = 30
BATCH_SIZE = 32

if BOOTSTRAP:
    N_BOOTSTRAPS = 10
    SAMPLE_SIZE = int(0.9*N_TRAIN_EX)
    bootstrap_idxs = np.zeros((N_BOOTSTRAPS, SAMPLE_SIZE)).astype(int)
    for i in range(N_BOOTSTRAPS):
        bootstrap_idxs[i] = np.random.randint(low=0, high=N_TRAIN_EX, size=SAMPLE_SIZE)
else:
    N_BOOTSTRAPS = 1
model_names = ["cnn"]
models = []
model_shapes = []
for j in range(N_BOOTSTRAPS):
    if BOOTSTRAP:
        X_train_subset = X_train[bootstrap_idxs[j]]
        y_train_subset = y_train[bootstrap_idxs[j]]
    else:
        X_train_subset = X_train
        y_train_subset = y_train

    X, X_val, y, y_val = train_test_split(X_train_subset, y_train_subset,
                                          test_size=0.05, shuffle=True, random_state=0)

    INPUT_SIZE = X.shape[1:]
    model_shapes.append(INPUT_SIZE)
    name = model_names[0]
    y_1H = keras.utils.to_categorical(y, num_classes=N_CLASSES)
    y_val_1H = keras.utils.to_categorical(y_val, num_classes=N_CLASSES)

    model = cnn(INPUT_SIZE, N_CLASSES)

    model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=0.0025), metrics=["accuracy"])
    model.fit(X, y_1H, epochs=N_EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_val, y_val_1H))
    model.save("./models/" + name + "_" + str(j) + ".hdf5")
    models.append(model)
    print(model.summary())


### Create ensemble of CNN models

In [None]:
from keras.models import Model
from keras.layers import average, Input

def ensemble_model(models, model_inputs):
    outputs = [model(model_inputs[m]) for m,model in enumerate(models)]
    avg = average(outputs) 
    ensemble = Model(inputs=model_inputs, outputs=avg, name="cnn-ensemble")  
    return ensemble

repeated_model_names = np.repeat(model_names, len(models))
model_inputs = [Input(shape=model_shapes[i], name=repeated_model_names[i]+ "_" + str(i)) for i in range(len(repeated_model_names))]
ensemble = ensemble_model(models, model_inputs)
print(ensemble.summary())
ensemble.save("./models/cnn-ensemble.hdf5")


### Make predictions on test data by aggregating predictions on partitions of original signal

In [None]:
model = ensemble
preds = model.predict([X_test]*len(models))           
y_preds = np.ndarray(N_TEST)
for n in range(N_TEST):
    start_idx = N_PARTITIONS*n
    end_idx = N_PARTITIONS*n+N_PARTITIONS
    indep_probs = preds[start_idx]
    for i in range(start_idx+1, end_idx):
        indep_probs *= preds[i]
    pred = np.argmax(indep_probs)
    y_preds[n] = pred

csv_data = [["Id", "Category"]]
for i in range(y_preds.shape[0]):
    csv_data.append([str(i), int_to_label[y_preds[i]]])
csv_df = pd.DataFrame(csv_data)
csv_df.to_csv("cnn-ensemble.txt", index=False, sep=",", header=False)