In [4]:
# imports

import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    precision_recall_fscore_support,
)

In [3]:
def load_data_to_df(file : str) -> pd.DataFrame:
    """
    Load data from file to a pandas dataframe
    and compute the error metrics
    
    Args:
    file : str : file path
    
    Returns:
    df : pd.DataFrame : dataframe with the data
    """
    

    data = np.fromfile(file, dtype=np.float32)

    data = data.reshape(data.shape[0]//2, 2)

    df = pd.DataFrame(data, columns=["Z_I", "Z_Q"])

    df["R_I"] = np.where(df["Z_I"] > 0, 1, -1)

    df["R_phase"] = np.where(df["R_I"] > 0, 0, np.pi)

    df["Z_mag"] = np.sqrt(df["Z_I"]**2 + df["Z_Q"]**2)

    df["Z_phase"] = np.arctan2(df["Z_Q"], df["Z_I"])

    df["magnitude_error"] = 1 - df["Z_mag"]

    df["E_I"] = df["R_I"] - df["Z_I"]

    df["E_mag"] = np.sqrt(df["E_I"] ** 2 + df["Z_Q"] ** 2)

    df["E_phase"] = np.arctan2(df["Z_Q"], df["E_I"])

    df["phase_error"] = (df["R_phase"] - df["Z_phase"])

    return df


def create_packets(df : pd.DataFrame, packet_size : int) -> pd.DataFrame:
    """

    Create packets of data from the dataframe
    
    Args:
    df : pd.DataFrame : dataframe with the data
    packet_size : int : size of the packets
    
    Returns:
    df_packets : pd.DataFrame : dataframe with the packets
    """

    df_packets = df.groupby(df.index // packet_size).mean()
    df_packets["IQ_offset"] = df.groupby(df.index // packet_size)[["Z_I", "Z_Q"]].apply(
        lambda x: np.mean(np.sqrt(x.Z_I ** 2 + x.Z_Q ** 2))
    )
    df_packets["error_vector_magnitude"] = df.groupby(df.index // packet_size)[
        "magnitude_error"
    ].apply(lambda x: np.sqrt(np.mean(x**2)))
    df_packets["magnitude_error_percentage"] = df.groupby(df.index // packet_size)[["E_mag", "Z_mag"]].apply(lambda x: np.sqrt(np.mean((np.diff(x))**2)))
    return df_packets


def create_packets_file(files : list[str], 
                        packet_size : int, 
                        protocol,
                        return_df : bool = False) -> pd.DataFrame:
    """
    Create packets of data from the file
    
    Args:
    file : str : file path
    packet_size : int : size of the packets
    return_df : bool : return the dataframe
    
    Returns:
    df_packets : pd.DataFrame : dataframe with the packets
    """

    df_packets = pd.DataFrame()
    for i, file in enumerate(files):
        if not file.endswith("iq"):
            continue
        data = load_data_to_df(f"data\\{file}")
        df_packets_current = create_packets(data, packet_size)
        df_packets_current["label"] = pd.Series(
            [i for _ in range(df_packets_current.shape[0])], dtype=np.uint8
        )
        df_packets = pd.concat([df_packets, df_packets_current])
    df_packets.to_parquet(f"data\\packets_{packet_size}_{protocol}.parquet", index=False)
    if return_df:
        return df_packets

## Create packets for each communication protocol

In [None]:

packet_sizes = {
    "zigbee": 127 * 8,
    "bluetooth": 258 * 8,
    "wifi": 2304 * 8,
}
files = os.listdir("data")
for key, value in tqdm(packet_sizes.items()):
    print(key,value)
    df = create_packets_file(files, value, key, False)

FileNotFoundError: [WinError 3] Het systeem kan het opgegeven pad niet vinden: 'data'

## Binary classification models

In [None]:
PROTOCOL = "zigbee"
df_packets = pd.read_parquet(f"data\\packets_{PROTOCOL}_{packet_sizes[PROTOCOL]}.parquet")
while df_packets.label.max() > 5:
    df_packets["label"] = df_packets["label"] // 2
X = df_packets[["Z_I", "Z_Q"]]
f1s = []
for i in range(6):
    model = svm.LinearSVC(class_weight="balanced")
    y_current = np.where(df_packets["label"] == i, 1, 0)
    X_train, X_test, y_train, y_test = train_test_split(X, y_current, test_size=0.2)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    f1s.append(precision_recall_fscore_support(y_test, y_pred, average="binary")[2])
    print(precision_recall_fscore_support(y_test, y_pred, average="binary"))

print(f1s)

## Multi classification model

In [None]:
model = svm.LinearSVC(class_weight="balanced")
df_packets = pd.read_parquet(
    f"data\\packets_{PROTOCOL}_{packet_sizes[PROTOCOL]}.parquet"
)
while df_packets.label.max() > 5:
    df_packets["label"] = df_packets["label"] // 2


X = df_packets[["Z_I", "Z_Q"]]

y = df_packets["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(precision_recall_fscore_support(y_test, y_pred, average="macro"))

Unnamed: 0,Z_I,Z_Q,R_I,R_phase,Z_mag,Z_phase,magnitude_error,E_I,E_mag,E_phase,phase_error,IQ_offset,error_vector_magnitude,magnitude_error_percentage,label
0,0.175127,0.000477,0.125984,1.372901,0.713927,0.320129,0.286073,-0.049143,0.580526,0.315043,1.052772,0.666011,0.677709,0.958391,0
1,-0.025455,0.010428,-0.019685,1.601718,1.187768,0.717486,-0.187768,0.00577,0.224387,0.243741,0.884231,0.847188,0.244779,0.968432,0
2,0.02199,-0.007388,0.015748,1.546059,1.170826,-0.080688,-0.170826,-0.006242,0.209448,-0.372432,1.626748,0.835188,0.231141,0.966953,0
3,-0.019371,-0.014152,-0.01378,1.592441,1.114924,-0.263023,-0.114924,0.005592,0.167616,-0.644827,1.855465,0.795184,0.186511,0.95519,0
4,-9.1e-05,-0.000261,0.009843,1.555336,1.12206,0.24386,-0.12206,0.009934,0.172834,-0.171475,1.311476,0.800602,0.194443,0.956731,0
