# Model 3 Permutation Importance

In [1]:
# Packages
import sys
import os
from sklearn.inspection import permutation_importance
# Add the parent directory to sys.path
sys.path.append(os.path.abspath('..'))
# Import module
from XGBoost_Functions import *
# Model name
model_name = 'include_previous_rating_model_3'


## Standard Model Code

In [2]:
# Load the data
df = load_data()

In [3]:
# Get the column names and mapping
numeric_feature_columns, cat_feature_columns, target_column, custom_mapping = get_column_names_and_mapping(model_name)

In [4]:
# Create matrices
_, X_test_scaled, _, y_test, feature_names = prepare_matrices(df, numeric_feature_columns, cat_feature_columns, target_column, custom_mapping)

feature names: 
['num__EBIT' 'num__common_plus_preferred_stock' 'num__workingCapital'
 'num__Ratio_A' 'num__Ratio_B' 'num__Ratio_C' 'num__Ratio_D'
 'num__Ratio_E' 'num__Positiv' 'num__Negativ' 'num__Strong' 'num__Weak'
 'num__Active' 'num__Passive' 'num__Ovrst' 'num__Undrst'
 'num__cashAndCashEquivalents' 'num__shortTermInvestments'
 'num__cashAndShortTermInvestments' 'num__netReceivables'
 'num__inventory_balance_sheet' 'num__otherCurrentAssets'
 'num__totalCurrentAssets' 'num__propertyPlantEquipmentNet'
 'num__goodwill' 'num__intangibleAssets'
 'num__goodwillAndIntangibleAssets' 'num__longTermInvestments'
 'num__taxAssets' 'num__otherNonCurrentAssets'
 'num__totalNonCurrentAssets' 'num__otherAssets' 'num__totalAssets'
 'num__accountPayables' 'num__shortTermDebt' 'num__taxPayables'
 'num__deferredRevenue' 'num__otherCurrentLiabilities'
 'num__totalCurrentLiabilities' 'num__longTermDebt'
 'num__deferredRevenueNonCurrent' 'num__deferredTaxLiabilitiesNonCurrent'
 'num__otherNonCurrentLia

## Load Trained Model

In [5]:
# Load trained model
model = joblib.load('../../../../Output/Modelling/XGBoost/' + model_name + '/' + model_name + '_best_estimator.pkl')
model

## Permutation Importance

In [6]:
# Perform permutation importance
result = permutation_importance(model, X_test_scaled, y_test, n_repeats=1000, random_state=222, n_jobs=-1)

# Put column name, mean and std in a dataframe
result = pd.DataFrame({'feature': feature_names, 'mean': result.importances_mean, 'std': result.importances_std})

# Output to disk
result.to_parquet('../../../../Output/Modelling/XGBoost/' + model_name + '/' + model_name + '_permutation_importance.parquet', index=False)

result

Unnamed: 0,feature,mean,std
0,num__EBIT,0.000000,0.000000
1,num__common_plus_preferred_stock,0.000000,0.000000
2,num__workingCapital,0.000000,0.000000
3,num__Ratio_A,0.000000,0.000000
4,num__Ratio_B,0.000000,0.000000
...,...,...,...
158,cat__rating_on_previous_fixed_quarter_date_BBB,0.257352,0.010267
159,cat__rating_on_previous_fixed_quarter_date_C,0.000900,0.000098
160,cat__rating_on_previous_fixed_quarter_date_CC,0.000000,0.000000
161,cat__rating_on_previous_fixed_quarter_date_CCC,0.025477,0.002348


In [8]:
result.sort_values("mean")

Unnamed: 0,feature,mean,std
0,num__EBIT,0.000000,0.000000
97,num__sellingGeneralAndAdministrativeExpenses,0.000000,0.000000
98,num__otherExpenses,0.000000,0.000000
99,num__operatingExpenses,0.000000,0.000000
100,num__costAndExpenses,0.000000,0.000000
...,...,...,...
154,cat__rating_on_previous_fixed_quarter_date_AA,0.036817,0.001890
153,cat__rating_on_previous_fixed_quarter_date_A,0.047979,0.004233
156,cat__rating_on_previous_fixed_quarter_date_B,0.080826,0.004940
158,cat__rating_on_previous_fixed_quarter_date_BBB,0.257352,0.010267
