In [1]:
import numpy as np
import pandas as pd
import h5py 
import torch
import torch.nn as nn
import gc
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [3]:
classes = ['OOK', '4ASK', '8ASK', 'BPSK', 'QPSK', '8PSK', '16PSK', '32PSK',
                           '16APSK', '32APSK', '64APSK', '128APSK', '16QAM', '32QAM', '64QAM',
                           '128QAM', '256QAM', 'AM-SSB-WC', 'AM-SSB-SC', 'AM-DSB-WC', 'AM-DSB-SC',
                           'FM', 'GMSK', 'OQPSK']

digital_modulation = ['OOK', '4ASK', '8ASK', 'BPSK', 'QPSK', '8PSK', '16PSK', '32PSK', '16APSK',
                              '32APSK', '64APSK', '128APSK', '16QAM', '32QAM', '64QAM', '128QAM', '256QAM']

digital_modulation_id = [classes.index(cls) for cls in digital_modulation]

N_SNR = 10 

In [4]:
def get_data(ids, labels):
    # Define dataset constants
    SAMPLES_PER_SNR = 4096
    NUM_SNR_LEVELS = 26
    TOTAL_SAMPLES_PER_CLASS = SAMPLES_PER_SNR * NUM_SNR_LEVELS  # 4096*26=106496

    X_data = []
    y_data = []

    with h5py.File("/home/lipplopp/Documents/research/notebook/notebook_1/dataset/radioml2018/versions/2/GOLD_XYZ_OSC.0001_1024.hdf5", "r") as file:
        for class_id in tqdm(ids):
            # Calculate slice indices
            start_idx = TOTAL_SAMPLES_PER_CLASS * class_id
            end_idx = TOTAL_SAMPLES_PER_CLASS * (class_id + 1)
            
            # Load data
            X_slice = file['X'][start_idx:end_idx]  # Shape: (106496, 1024, 2)
            y_slice = file['Y'][start_idx:end_idx]  # Shape: (106496, 24)
            
            # Process labels
            y_labels = y_slice.argmax(axis=1)  # Convert one-hot to class indices
            
            X_data.append(X_slice)
            y_data.append(y_labels)

    # Combine all selected classes
    X_data = np.concatenate(X_data, axis=0)
    y_data = np.concatenate(y_data, axis=0)

    return X_data, y_data

In [6]:
# Define your modulation classes (24 total in RadioML 2018)
selected_classes = [0, 1, 2]  # Example: BPSK, QPSK, 8PSK
classes_labels = ["BPSK", "QPSK", "8PSK"]

# Load data
X, y = get_data(ids=selected_classes, labels=classes_labels)

# Verify shapes
print(f"Input shape: {X.shape}")  # Should be (319488, 1024, 2) for 3 classes
print(f"Labels shape: {y.shape}")  # Should be (319488,)

100%|███████| 3/3 [00:06<00:00,  2.16s/it]


Input shape: (319488, 1024, 2)
Labels shape: (319488,)


In [26]:
# Convert class indices to one-hot encoded DataFrame
Y_fsk = pd.get_dummies(pd.DataFrame(y, columns=['class'])).astype(int)

# Verify the number of columns (should be 3 for classes 0,1,2)
print("Columns before renaming:", Y_fsk.columns.tolist())  # Should see ['class_0', 'class_1', 'class_2']

# # Rename columns to match actual classes (OOK, 4ASK, 8ASK)
Y_fsk.columns = ['16APSK', '32APSK', '64APSK']

# # Verify final shape
# print("Final shape:", Y_fsk.shape)

Columns before renaming: ['class']


ValueError: Length mismatch: Expected axis has 1 elements, new values have 3 elements