In [1]:
import ast

import numpy as np
import pandas as pd


In [2]:
aps_cols = ["ap_from_ap_max_rssi", "ap_from_ap_mean_rssi"]

In [3]:
def resolve_nav_from_aps(row):
    # count how many values in "ap_from_ap_mean_rssi" > "nav"
    aps_mean = np.array(ast.literal_eval(row[(aps_cols[1], "org")]))
    nav_value = pd.to_numeric(row[("nav", "_")], errors="coerce")
    nav_percent = np.mean(aps_mean > nav_value)
    return nav_percent


def resolve_in_PD_ED_from_aps(row):
    aps_max = np.array(ast.literal_eval(row[(aps_cols[0], "org")]))
    in_PD_ED = sum((aps_max >= row[("pd", "_")]) & (aps_max <= row[("ed", "_")])) / len(
        aps_max
    )
    return in_PD_ED


def resolve_larger_ED_from_aps(row):
    aps_max = np.array(ast.literal_eval(row[(aps_cols[0], "org")]))
    larger_ED = sum(aps_max > row[("ed", "_")]) / len(aps_max)
    return larger_ED


In [4]:
df_2ap = pd.read_csv("./df_2ap_1.csv", header=[0, 1], index_col=None)
len(df_2ap)

392

In [5]:
df_2ap[(aps_cols[1], "nav_percent")] = df_2ap.apply(resolve_nav_from_aps, axis=1)
df_2ap[(aps_cols[0], "in_pd_ed")] = df_2ap.apply(resolve_in_PD_ED_from_aps, axis=1)
df_2ap[(aps_cols[0], "larger_ed")] = df_2ap.apply(resolve_larger_ED_from_aps, axis=1)

In [6]:
cols_to_check = [
    (aps_cols[1], "nav_percent"),
    (aps_cols[0], "in_pd_ed"),
    (aps_cols[0], "larger_ed"),
]
df_2ap[cols_to_check].head(10)

Unnamed: 0_level_0,ap_from_ap_mean_rssi,ap_from_ap_max_rssi,ap_from_ap_max_rssi
Unnamed: 0_level_1,nav_percent,in_pd_ed,larger_ed
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
5,0.0,0.061224,0.0
6,0.0,0.0,0.0
7,0.0,0.061224,0.0
8,0.0,0.0,0.0
9,0.0,0.04,0.0


In [7]:
len(df_2ap.columns)

243

In [8]:
# (*rssi, !org)
# (!*rssi, _)

# 提取与 "*rssi, !org" 相关的列 (一级索引包含 'rssi' 且二级索引不等于 'org')
rssi_not_org_cols = [
    col
    for col in df_2ap.columns
    if "rssi" in col[0] and col[1] != "org" and col[1] != "_"
]

# 提取与 "!*rssi, _" 相关的列 (一级索引不包含 'rssi' 且二级索引等于 '_')
not_rssi_underscore_cols = [
    col for col in df_2ap.columns if "rssi" not in col[0] and col[1] == "_"
]

# 创建新的 DataFrame 包含提取出的列
df_rssi_not_org = df_2ap[rssi_not_org_cols]
df_not_rssi_underscore = df_2ap[not_rssi_underscore_cols]

In [9]:
cols_to_use = [
    # ("test_id", "_"),
    # ("test_dur", "_"),
    ("loc_id", "_"),
    ("protocol", "_"),
    # ("pkt_len", "_"),
    ("bss_id", "_"),
    # ("ap_name", "_"),
    ("ap_mac", "_"),
    # ("ap_id", "_"),
    ("pd", "_"),
    ("ed", "_"),
    ("nav", "_"),
    ("eirp", "_"),
    ("sta_mac", "_"),
    # ("sta_id", "_"),
    ("seq_time", "_"),
]

catagory_cols = [
    ("loc_id", "_"),
    ("protocol", "_"),
    ("bss_id", "_"),
    ("ap_mac", "_"),
    ("sta_mac", "_"),
]

In [10]:
df_2ap = pd.concat([df_not_rssi_underscore[cols_to_use], df_rssi_not_org], axis=1)

# # deal with catagory columns
# for col in catagory_cols:
#     df_2ap[col] = df_2ap[col].astype("category")

In [11]:
df_dummies = pd.get_dummies(df_2ap[catagory_cols])
new_cols = [(col, "_") for col in df_dummies.columns]
# new col names (org_name, _)
df_dummies.columns = pd.MultiIndex.from_tuples(new_cols)

In [12]:
df_2ap = pd.concat([df_2ap, df_dummies], axis=1)
df_2ap.drop(columns=catagory_cols, inplace=True)

In [13]:
df_2ap.columns

MultiIndex([(                            'pd',      '_'),
            (                            'ed',      '_'),
            (                           'nav',      '_'),
            (                          'eirp',      '_'),
            (                      'seq_time',      '_'),
            (      'sta_to_ap_0_max_ant_rssi', 'length'),
            (      'sta_to_ap_0_max_ant_rssi',    'max'),
            (      'sta_to_ap_0_max_ant_rssi',    'min'),
            (      'sta_to_ap_0_max_ant_rssi', 'median'),
            (      'sta_to_ap_0_max_ant_rssi',  'range'),
            ...
            (          '('loc_id', '_')_loc1',      '_'),
            (          '('loc_id', '_')_loc2',      '_'),
            (         '('protocol', '_')_tcp',      '_'),
            (         '('protocol', '_')_udp',      '_'),
            ('('ap_mac', '_')_6c14-6ef5-9510',      '_'),
            ('('ap_mac', '_')_8c68-3a11-e370',      '_'),
            (         '('sta_mac', '_')_471f',      '_')

In [14]:
# save
df_2ap.to_csv("./df_2ap_final.csv", index=False)