In [17]:
import pandas as pd
from modeling_old import clear_column_names, regressor

In [18]:
def load_data(path="./df_2ap_final.csv"):
    df = pd.read_csv(path, header=[0, 1], index_col=None)

    y = df[("throughput", "_")]
    X = df.drop(columns=[("throughput", "_")])

    X.columns = clear_column_names(X)

    return X, y

In [19]:
X, y = load_data()
X_test, y_test = load_data("./df_1_2ap_test_final.csv")
X.shape, X_test.shape

((392, 227), (80, 218))

In [20]:
def align_train_test(X_train, X_test):
    missing_cols = set(X_train.columns) - set(X_test.columns)
    print(f"Missing columns in test set: {missing_cols}")
    for c in missing_cols:
        X_test[c] = False
    X_test = X_test[X_train.columns]
    return X_test


X_test = align_train_test(X, X_test)
X.shape, X_test.shape

Missing columns in test set: {'mcs_nss_4_1', 'mcs_nss_0_0', 'mcs_nss_5_2', 'mcs_nss_3_2', 'mcs_nss_0_1', 'mcs_nss_6_1', 'mcs_nss_7_1', 'mcs_nss_4_2', 'mcs_nss_5_1'}


((392, 227), (80, 227))

In [21]:
y_test

0    NaN
1    NaN
2    NaN
3    NaN
4    NaN
      ..
75   NaN
76   NaN
77   NaN
78   NaN
79   NaN
Name: (throughput, _), Length: 80, dtype: float64

In [22]:
regressor(X, y, "xgboost")


Feature Importances:
                                        Feature  Importance
193                 ap_from_ap_mean_rssi_median    0.203347
2    sta_to_ap_0_max_ant_rssi_in_nav_ed_percent    0.129069
7               sta_to_ap_0_max_ant_rssi_median    0.093934
43   sta_to_ap_1_max_ant_rssi_in_nav_ed_percent    0.086850
51                sta_to_ap_1_max_ant_rssi_mean    0.078177
..                                          ...         ...
135              sta_from_ap_1_max_ant_rssi_var    0.000000
134             sta_from_ap_1_max_ant_rssi_mean    0.000000
133              sta_from_ap_1_max_ant_rssi_iqr    0.000000
132            sta_from_ap_1_max_ant_rssi_range    0.000000
70                sta_to_ap_1_mean_ant_rssi_iqr    0.000000

[227 rows x 2 columns]
{'mse': {'train': [31.97971136952882, 40.38638852857926, 42.77703684815657, 31.931505912149493, 37.37553824119215], 'test': [265.01134361508224, 188.84563038657015, 132.14596780303182, 293.99242033921456, 336.90929868709685]}, 'r2': {

{'shap_values':                                           Feature  SHAP Value  SHAP Sign
 60   sta_to_ap_1_max_ant_rssi_approximate_entropy   -0.585431       -1.0
 2      sta_to_ap_0_max_ant_rssi_in_nav_ed_percent    0.501562        1.0
 10                  sta_to_ap_0_max_ant_rssi_mean    0.490537        1.0
 43     sta_to_ap_1_max_ant_rssi_in_nav_ed_percent   -0.397119       -1.0
 215                                  mcs_nss_11_2    0.396497        1.0
 ..                                            ...         ...        ...
 127      sta_from_ap_1_max_ant_rssi_la_ed_percent    0.000000        0.0
 123  sta_from_ap_0_mean_ant_rssi_grouping_entropy    0.000000        0.0
 154               sta_from_ap_1_mean_ant_rssi_iqr    0.000000        0.0
 152            sta_from_ap_1_mean_ant_rssi_median    0.000000        0.0
 169             ap_from_ap_max_rssi_la_ed_percent    0.000000        0.0
 
 [227 rows x 3 columns],
 'feature_importances':                                         Featur

In [23]:
import xgboost as xgb

model = xgb.XGBRegressor(
    objective="reg:squarederror",
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    random_state=42,
)

model.fit(X, y)
y_test_pred = model.predict(X_test)

In [24]:
# save predictions
y_test_pred = pd.Series(y_test_pred, name="throughput")
y_test_pred.to_csv("./results/y_1_2ap_test_pred.csv", index=False)