# Feature Selection and Feature Importance
$$
{\text\ Importance\ for\ feature_i} = {\text\ best\ cost\ using\ features_{-i}} / {\text\ best\ cost\ using\ all\ features}
$$

In [2]:
# feature importance
import numpy as np
import ConformaQuantile as CQ

quantile = 0.8
n_samples = 10000
n_X1 = 20
n_X2 = 2
n_X3 = 2
interval_length = 100
np.random.seed(0)

X1 = abs(np.random.normal(6.4, 10, (n_samples, n_X1)))
X2 = abs(np.random.normal(0.4, 1, (n_samples, n_X2)))
X3 = abs(np.random.normal(0.9, 1, (n_samples, n_X3)))

coefficients = abs(np.random.normal(10, 400, n_X1 + n_X2))
X = np.hstack((X1, X2, X3))
noise = np.random.normal(0, 1, n_samples)

X_true = X[:, :(n_X1 + n_X2)]
X_observed = np.hstack((X1, X3))
Y = np.dot(X_true, coefficients)


train_ratio = 0.6
validation_ratio = 0.2
test_ratio = 0.2
models = ['linear', 'quantile', 'lasso', 'ridge', 'random_forest', 'glm', 'neural_network']
Y0 = Y + noise
loss = {}
for model in models:
    loss_unadjusted, loss_adjusted = CQ.perform_regression_analysis(X_observed, Y0, train_ratio, test_ratio, validation_ratio, quantile, model_type=model)
    # 将结果存储在字典中
    loss[model] = {'loss_unadjusted': loss_unadjusted, 'loss_adjusted': loss_adjusted}
min_loss_model = min(loss, key=lambda x: loss[x]['loss_adjusted'])
min_loss = loss[min_loss_model]


linear loss unadjusted 6972.893055621992 loss_adjusted 125.59176725989416
quantile loss unadjusted 6883.976768322941 loss_adjusted 126.35026095161547
lasso loss unadjusted 6972.873227794408 loss_adjusted 125.58936046320991
ridge loss unadjusted 6972.8803913867605 loss_adjusted 125.59206856755274
random_forest loss unadjusted 6125.013594244923 loss_adjusted 1645.7233999817074
glm loss unadjusted 6972.893055621939 loss_adjusted 125.59176725989492
neural_network loss unadjusted 47644.04603620853 loss_adjusted 3552.023744321883


In [28]:
# feature importance

n_features_observed = X_observed.shape[1]
loss_modified = {}
ratios = []
for i in range(n_features_observed):
    X_observed_deleted_i = np.delete(X_observed, i, axis=1)
    print(X_observed_deleted_i.shape[1])
    for model in models:
        loss_unadjusted, loss_adjusted = CQ.perform_regression_analysis(X_observed_deleted_i,
                                                                         Y0, train_ratio, test_ratio, 
                                                                         validation_ratio, quantile, 
                                                                         model_type=model)
    # 将结果存储在字典中
        loss_modified[model] = {'loss_unadjusted': loss_unadjusted, 'loss_adjusted': loss_adjusted}
    min_loss_model = min(loss_modified, key=lambda x: loss[x]['loss_adjusted'])
    min_loss_deleted = loss_modified[min_loss_model]
    ratio =  min_loss_deleted['loss_adjusted']/ min_loss['loss_adjusted']
    ratios.append(ratio)


linear loss unadjusted 6887.182953043456 loss_adjusted 673.0959679762929
quantile loss unadjusted 6389.962687579106 loss_adjusted 674.1611963174726
lasso loss unadjusted 6887.167592837898 loss_adjusted 673.0977768786927
ridge loss unadjusted 6887.170790270856 loss_adjusted 673.0957768228136
random_forest loss unadjusted 6125.010578275807 loss_adjusted 1677.0601830002163
glm loss unadjusted 6887.182953043476 loss_adjusted 673.0959679762938
neural_network loss unadjusted 48160.63727621098 loss_adjusted 3574.3315251766903

linear loss unadjusted 6782.764707821611 loss_adjusted 959.8351474112939
quantile loss unadjusted 6102.304501866081 loss_adjusted 959.1899743736889
lasso loss unadjusted 6782.753228477607 loss_adjusted 959.8350105276054
ridge loss unadjusted 6782.752917936259 loss_adjusted 959.8344331607536
random_forest loss unadjusted 6095.493266928759 loss_adjusted 1730.3239174747243
glm loss unadjusted 6782.764707821617 loss_adjusted 959.8351474112864
neural_network loss unadjusted



quantile loss unadjusted 6649.608706267647 loss_adjusted 441.50475284070416
lasso loss unadjusted 6976.851453641525 loss_adjusted 439.3863652868455
ridge loss unadjusted 6976.853800257321 loss_adjusted 439.3845988341793
random_forest loss unadjusted 6139.00779476659 loss_adjusted 1640.659081152404
glm loss unadjusted 6976.866669888767 loss_adjusted 439.38533191518405
neural_network loss unadjusted 47857.68954079838 loss_adjusted 3560.2947624475514

linear loss unadjusted 6931.030609637786 loss_adjusted 522.403983103342
quantile loss unadjusted 6553.955256936324 loss_adjusted 523.6083561301587
lasso loss unadjusted 6931.011671564154 loss_adjusted 522.4049009570582
ridge loss unadjusted 6931.018038264665 loss_adjusted 522.4039051116872
random_forest loss unadjusted 6133.588562952742 loss_adjusted 1640.1473647853593
glm loss unadjusted 6931.030609637756 loss_adjusted 522.4039831033422
neural_network loss unadjusted 47961.209657460975 loss_adjusted 3564.4383936975514

linear loss unadjuste



quantile loss unadjusted 6877.503674931597 loss_adjusted 129.1197915865584
lasso loss unadjusted 6971.845694806625 loss_adjusted 128.9102508295954
ridge loss unadjusted 6971.8516090636895 loss_adjusted 128.90748940475297
random_forest loss unadjusted 6132.194805347104 loss_adjusted 1631.7091278516361
glm loss unadjusted 6971.864359351846 loss_adjusted 128.90691002987464
neural_network loss unadjusted 48036.63898745365 loss_adjusted 3567.622076739191

linear loss unadjusted 6977.895358485986 loss_adjusted 455.41075781980066
quantile loss unadjusted 6640.673200576966 loss_adjusted 456.70205903107706
lasso loss unadjusted 6977.881739032168 loss_adjusted 455.40751514132603
ridge loss unadjusted 6977.882665108964 loss_adjusted 455.4098645488537
random_forest loss unadjusted 6129.500525148082 loss_adjusted 1634.475624090607
glm loss unadjusted 6977.895358485989 loss_adjusted 455.4107578198008
neural_network loss unadjusted 48255.04570052738 loss_adjusted 3577.5861954891907

linear loss unadj



quantile loss unadjusted 6245.475665845618 loss_adjusted 841.2854956344378
lasso loss unadjusted 6849.726191846395 loss_adjusted 841.9073502861532
ridge loss unadjusted 6849.729342135334 loss_adjusted 841.9055412627931
random_forest loss unadjusted 6131.356014334839 loss_adjusted 1724.420755720687
glm loss unadjusted 6849.741602285741 loss_adjusted 841.905430362815
neural_network loss unadjusted 47946.37875953618 loss_adjusted 3564.5710126766908

linear loss unadjusted 6960.379814493178 loss_adjusted 230.85776845063933
quantile loss unadjusted 6782.754908519702 loss_adjusted 230.74817714359145
lasso loss unadjusted 6960.370288018952 loss_adjusted 230.8571400919007
ridge loss unadjusted 6960.367208906233 loss_adjusted 230.8580705259122
random_forest loss unadjusted 6124.196682978555 loss_adjusted 1635.6662748410097
glm loss unadjusted 6960.37981449315 loss_adjusted 230.85776845063998
neural_network loss unadjusted 48154.99173630619 loss_adjusted 3572.9275126766906

linear loss unadjuste

In [29]:
print(ratios)

[5.35951273576132, 7.642645897609925, 3.4985954516072173, 4.15962704985739, 1.0034454575962064, 6.935469184989804, 1.026442449855124, 3.6261631834229533, 1.5287862469935993, 10.220702065946673, 11.374873508165878, 6.70365186335017, 1.8381902673955242, 10.928926079089486, 1.1012311721770893, 16.228531501242642, 8.979077084183656, 2.978688168065473, 2.5355457731429816, 6.60804544407734, 0.9998776229224621, 0.999995818945186]
