In [4]:
import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.feature_selection import (
    RFE,
    SelectFromModel,
    SelectKBest,
    SequentialFeatureSelector,
    f_classif,
    mutual_info_classif,
)
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from utils import get_data, get_param_combinations, get_params_json, save_results

In [2]:
X, y = get_data()

# X = X[:50, :10]
# y = y[:50]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42,
)

In [3]:
# model = GradientBoostingClassifier(random_state=42)
model = RandomForestClassifier(random_state=42)
selector = RFE(model, n_features_to_select=1)

selector.fit(X_train, y_train)

print(selector.ranking_)
print(selector.support_)

[ 11  60   9  24  38  28  43  75   7  16 190 357 384 158 412 358 484 344
 260 296  48  31 281 367 442 275 225 146 142 144 286 417 360 497 161 120
  46 394 188  93 439 111 429 332  76  67 333 343 385 431  52 376 294  39
 459 387 476  40 415 136  90 407 486 346 218 390 366  68 460 256 169 496
 249 124 119 150 389 401  30 462 129 271 299 156 139 221 269 101 311 457
 105 245 334 482 183 411 368 132 180 216   2   3   4   5   6   1 103 270
  32  14 316 237  92   8 449 186 210  99 436 310  63 227  83  66 261 481
 100 409 438 107 489  53 354  29 240 226 248 371 151  25 477 464 178 195
 336 117 181 356 140 471 283 340  96 135 267  98 171  45  71 416 313  37
 395 341 211 182 325 396 468 230 164 372 443 405 223  41  55  26 217 465
 263 450  35 455 208 492 258 365 369 280  81 116 430 172 363 469 303 479
 108  87 187 274  78 207 302 179 138 262  59 342 452 157 392  22  18  70
  73 102 490 420  94 214 317 282 279 131 381 253 137 498 143 422 461 235
  79 122 500 273 185 448 322  13 424  77 288 485  4

In [9]:
pd.DataFrame(
    zip(selector.ranking_, selector.support_), columns=["Ranking", "Support"]
).to_csv("../results/rfe_rf.csv", index=False)