In [62]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sn
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from scipy.stats import skew, kurtosis
from scipy.signal import welch
from scipy.signal import hilbert

import json 
import pandas as pd
import numpy as np
import os
import glob


### *variables*

In [208]:
num_features = 19

data_dir = '/Volumes/DRIVE 128GB/iqSamples_Ruko_F11_Pro.csv'
drone_csv_files = glob.glob(os.path.join(data_dir, "*.csv"))
print(drone_csv_files)

chunk_size = 10_000 # how many rows to hold in memory at a time
every_n_rows = 1000

# Example: sample windows with 50% overlap
# window_size = 1024 # number of iq samples per chunk (window_size=1 is 1 sample, i,q)
window_size = 1024 # number of iq samples per chunk (window_size=1 is 1 sample, i,q)
window_size = 2048 # number of iq samples per chunk (window_size=1 is 1 sample, i,q)
window_size = 4096 # number of iq samples per chunk (window_size=1 is 1 sample, i,q)
# window_size = 8192 # number of iq samples per chunk (window_size=1 is 1 sample, i,q)
# window_size = 16_384 # number of iq samples per chunk (window_size=1 is 1 sample, i,q)
# window_size = 32_768 # number of iq samples per chunk (window_size=1 is 1 sample, i,q)

# step = 8192
# step = 4096
# step = 2048
# step = 1024
step = 512

skipSVC = False
skipRF = True

[]


### *functions*

In [111]:
# other 
def getLabelFromFilename(filename): 
    filename = filename.lower()
    if "phantom" in filename: return "Phantom"
    elif "ruko_f11_pro" in filename: return "Ruko_F11_Pro"
    elif "ruko_f11_base" in filename: return "Ruko_F11_base"
    elif "mavic_air_2_" in filename: return "Mavic_Air_2"
    elif "mavic_air_2s" in filename: return "Mavic_Air_2S"
    elif "deerc" in filename: return "DeerC_DE2"
    elif "mini_se" in filename: return "Mini_SE"
    elif "holystone_hs100" in filename: return "Holystone_HS100"
    elif "none" in filename: return "None"
    else: return "unkown_label"



def load_iq(f, chunk_size, every_n_rows):
    # The chunking in this algo is so that we can read in large files (156M lines or more). 
    # In order to reduce compution time, we skip n number of rows. 
    result = pd.DataFrame()

    for chunk in pd.read_csv(f, chunksize=chunk_size):
        result = pd.concat([result, chunk.iloc[::every_n_rows, :]], ignore_index=True)

    # print("THE DF:\n", result)
    result = result[['i', 'q']].to_numpy()
    return result



# metric's functions 
def checkForFilename(base_name): 
    ext = ".txt"
    i = 1
    filename = f"{base_name}{ext}"
    while os.path.exists(filename):
        filename = f"{base_name}_{i}{ext}"
        i += 1
    return filename



def saveMetricsToFile(base_name, perc_accuracy, model): 
    filename = checkForFilename(base_name)
    with open(filename, "w") as f:
        f.write(f"Model: {model}")
        f.write(f"Accuracy: {perc_accuracy:.2f}%\n\n")
        f.write("The important vars:\n")
        f.write(f"window_size: {window_size}\n")
        f.write(f"step: {step}\n")
        f.write(f"every n rows: {every_n_rows}\n")
        f.write(f"chunk size: {chunk_size}\n\n")
        f.write("Classification Report:\n")
        f.write(f"{cr}\n\n")
        f.write("Confusion Matrix:\n")
        
        f.write(f"{cm}\n")





In [None]:
# for numpy conversion 

def extractFeatures(iq_window): 
    f, Pxx = welch(iq_window, nperseg=1024)  # FFT-based PSD
    # Spectral centroid / bandwidth / flatness:
    spectral_centroid = np.sum(f*Pxx)/np.sum(Pxx)
    spectral_bandwidth = np.sqrt(np.sum(Pxx*(f-spectral_centroid)**2)/np.sum(Pxx))
    spectral_flatness = np.exp(np.mean(np.log(Pxx+1e-12)))/np.mean(Pxx+1e-12)
    # Envelope statistics: use Hilbert transform to get instantaneous amplitude
    envelope = np.abs(hilbert(iq_window))
    env_mean = np.mean(envelope)
    env_std  = np.std(envelope)
    # Instantaneous frequency variance (phase derivative)
    phase = np.angle(iq_window)
    inst_freq = np.diff(phase)
    freq_std = np.std(inst_freq)


    sample = [
        np.mean(np.abs(iq_window)),        # mean amplitude
        np.std(np.abs(iq_window)),         # amplitude std
        skew(np.abs(iq_window)),           # skewness
        kurtosis(np.abs(iq_window)),       # kurtosis
        np.max(np.abs(iq_window)),         # max
        np.min(np.abs(iq_window)),         # min
        np.percentile(np.abs(iq_window),25),
        np.percentile(np.abs(iq_window),50),
        np.percentile(np.abs(iq_window),75),
        # np.corrcoef(np.real(iq_window), np.imag(iq_window))[0,1],  # I/Q correlation
        np.mean(Pxx), np.std(Pxx), np.max(Pxx), np.min(Pxx),      # PSD stats
        spectral_centroid, spectral_bandwidth, spectral_flatness, # spectral
        env_mean, env_std, freq_std                                  # envelope / freq
    ]
    
    return np.array(sample)




def runConversion(meta_file): 
    data_file = meta_file.replace("meta", "data")
    label = getLabelFromFilename(meta_file)

    print(f"Label: {label}")
    print(f"Working on: \n{meta_file.replace(".sigmf-meta", "")}")

    try:
        with open(meta_file, 'r') as f:
            meta = json.load(f)
    except json.JSONDecodeError:
        print(f"Error: {json.JSONDecodeError}")
        return

    dtype_map = {
        "ri8_le":  np.int8,
        "ri16_le": np.int16,
        "ri32_le": np.int32,
        "rf32_le": np.float32,
        "cf32_le": np.complex64,
        "ci8_le":  np.int8,
        "ci16_le": np.int16,
        "ci32_le": np.int32,
    }
    dtype = dtype_map.get(meta["global"]["core:datatype"], np.int16)

    iq = np.fromfile(data_file, dtype=dtype)

    print(f"Converted {len(iq):,} samples to numpy array")
    print(f"After conversion, one IQ sample: {iq[:2]}")
    
    return iq




def readInBinDir(dir): 
    drone_bin_files = glob.glob(os.path.join(dir, "*.sigmf-meta"))
    iq_files = []
    num_derived_samples_total= 0
    derived_samples = np.empty((num_derived_samples_total, num_features))
    
    # first convert the files into a numpy array 
    for file in drone_bin_files:
        print("--------------------------------------------")
        iq_samples = runConversion(file)
        iq_files.append(iq_samples)
        num_derived_samples_total += (iq_samples.size // 2) // window_size
        
    # then create a numpy array of the statistical features from the files
    # the number if IQ samples to consider for each derived sample is the window_size
    # i.e. a window_size of 12 means 12 IQ samples will be used in one derived sample
    for i in range(len(iq_files)): 
        n_samps_in_file = (iq_files[i].size // 2) // window_size
        print("--------------------------------------------")
        drvd_samps = np.empty((n_samps_in_file, num_features))
        for j in range(n_samps_in_file):
            start = j * n_samps_in_file
            end = start + n_samps_in_file
            iq_window = iq_files[i][start:end]
            derived_sample = extractFeatures(iq_window)
            drvd_samps[j, :] = derived_sample
        print(f"Shape of samples: {drvd_samps.shape}")
        # print(f"One Sample:\n{drvd_samps[:1]}")

        print(f"Rows in file:{len(drvd_samps)}")
        start = i * len(drvd_samps)
        end = start + len(drvd_samps)
        derived_samples[start:end] = drvd_samps
        
    print("--------------------------------------------")
    print(f"Shape of ALL samples: {derived_samples.shape}")
    print(f"One ALL Sample:\n{derived_samples[:1]}")
    
    # print(f"Num samples total: {num_derived_samples_total}")

## Loading Dataset

### *Reading in files*

In [209]:
data_dir = '/Volumes/DRIVE 128GB/'
iq_complex = readInBinDir(data_dir)


--------------------------------------------
Label: Mavic_Air_2
Working on: 
/Volumes/DRIVE 128GB/2025-10-15-20-59-04_mavic_air_2_5735MHz_chan_0
Converted 31,250,000 samples to numpy array
After conversion, one IQ sample: [-2357  8992]
--------------------------------------------
Label: Holystone_HS100
Working on: 
/Volumes/DRIVE 128GB/2025-10-16-22-16-49_holystone_hs100_500ms_5300MHz_chan_0
Converted 31,250,000 samples to numpy array
After conversion, one IQ sample: [-630 -945]
--------------------------------------------
Label: DeerC_DE2
Working on: 
/Volumes/DRIVE 128GB/2025-10-16-23-27-01_deerc_de22_remote_500ms_2455MHz_chan_0
Converted 31,250,000 samples to numpy array
After conversion, one IQ sample: [ 0 -5]
--------------------------------------------
Shape of samples: (3814, 19)
Rows in file:3814
--------------------------------------------
Shape of samples: (3814, 19)
Rows in file:3814
--------------------------------------------
Shape of samples: (3814, 19)
Rows in file:3814


In [None]:
X = []
y = []

segments = []
labels = []


# create an array of dataframes.
# each dataframe containing iq samples from one file
for file in drone_csv_files:
    print("File: ", file)
    label = getLabelFromFilename(file)
    # print("Label: ", label)
    iq = load_iq(file, chunk_size, every_n_rows)  # shape (N, 2)
    # print("File:\n", iq)
    for i in range(0, len(iq)-window_size, step):
        window = iq[i:i+window_size].flatten()
        segments.append(window)
        labels.append(label)
    # print("Segments:\n", segments)
    # print("Labels:\n", labels)

X = np.array(segments)
y = np.array(labels)

### *Displaying things*

In [36]:
print("Segments:")
print("Shape: ", X.shape)
print(X)
print("-------------------------------------------------------------------")
print("Labels:")
print("Shape: ", y.shape)
print(y)

Segments:
Shape:  (0,)
[]
-------------------------------------------------------------------
Labels:
Shape:  (0,)
[]


In [37]:
# print("labels: ")
# print(np.unique(y))
# print()

for sample, label in zip(X, np.unique(y)): 
    print("X feature:\n", sample)
    print("IQ samples in X feature: ", int(sample.size / 2))
    print("y label: ", label)
    print('------------------------------------------------------')

## Random Forest Classifer 

### *Training, fitting & predicting*

I need to figure out how to use the random forest classifier for this. 

In [None]:
# Labeling 

labelBinary = LabelBinarizer()

# Gender == 1 if Male
airlines["Gender"] = labelBinary.fit_transform(airlines["Gender"])
# Customer == 1 if Loyal
airlines["Customer"] = labelBinary.fit_transform(airlines["Customer"])
# TravelType == 1 if Personal
airlines["TravelType"] = labelBinary.fit_transform(airlines["TravelType"])
# Class == 1 if Business
airlines["Class"] = labelBinary.fit_transform(airlines["Class"])

In [None]:
# Fit

rfModel = RandomForestClassifier()
rfModel.fit(X, np.ravel(y))


### *Metrics*

In [None]:
accuracy = accuracy_score(y_test, y_pred)
perc_accuracy = accuracy * 100
print(f"Accuracy: {perc_accuracy:.2f}%")
print()
print("The important vars: ")
print("window_size: ", window_size)
print("step: ", step)
print("every n rows: ", every_n_rows)
print("chunk size: ", chunk_size)
print()

cr = classification_report(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print(cr)
print(cm)

saveMetricsToFile('svc_metrics', perc_accuracy)

In [None]:
# Create a heatmap of the confusion matrix

plt.figure(figsize=(7, 7))
sn.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=np.unique(labels), yticklabels=np.unique(labels))

# Labels and title
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix Heatmap for SVC Predictions')

plt.show()
