# model 4 permutation importance

In [1]:
# Packages
from Logistic_Regression_Functions import *
import json
from sklearn.inspection import permutation_importance

## Standard Model Code

In [2]:
# Load the data
df = load_data()

# Load the JSON file
with open('feature_columns.json', 'r') as file:
    column_data = json.load(file)

In [3]:
# Numeric features
numeric_feature_columns = column_data['tabular_feature_columns'] + column_data['nlp_feature_columns']
# Categorical features
cat_feature_columns = column_data['cat_feature_columns']
# Target column
target_column = column_data['target_column']
# Create mapping
custom_mapping = {'AAA': 0, 'AA': 1, 'A': 2, 'BBB': 3, 'BB': 4, 'B': 5, 'CCC': 6,"CC":7,"C":8,"D":9}

In [4]:
# Create matrices
_, X_test_scaled, _, y_test = prepare_matrices(df, numeric_feature_columns, cat_feature_columns, target_column, custom_mapping, change=False)


## Load Trained Model

In [5]:
# Load trained model '../../../Output/Modelling/Logistic Regression/rating_model_4/rating_model_4_best_estimator.pkl'
model = joblib.load('../../../Output/Modelling/Logistic Regression/rating_model_4/rating_model_4_best_estimator.pkl')
model

## Permutation Importance

In [6]:
# Perform permutation importance
result = permutation_importance(model, X_test_scaled, y_test, n_repeats=5, random_state=222, n_jobs=-1)

# Check lengths
print('shape X: ', X_test_scaled.shape)
print('num + cat: ', len(numeric_feature_columns + cat_feature_columns))
print('importances: ', len(result.importances_mean))
print('stds: ', len(result.importances_std))

# Expanded feature column names
expanded_cat_cols = ['rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date', 'rating_on_previous_fixed_quarter_date'] + ['Sector', 'Sector', 'Sector', 'Sector', 'Sector', 'Sector', 'Sector', 'Sector', 'Sector', 'Sector', 'Sector']
print('with expanded')
print(len(numeric_feature_columns + expanded_cat_cols))

print('importances mean')
print(result.importances_mean)

print('imp std')
print(result.importances_std)

# Put column name, mean and std in a dataframe
result = pd.DataFrame({'feature': numeric_feature_columns + expanded_cat_cols, 'mean': result.importances_mean, 'std': result.importances_std})

# Output to disk
result.to_parquet('../../../Output/Modelling/Logistic Regression/rating_model_4/rating_model_4_permutation_importance.parquet', index=False)

result

shape X:  (1127, 142)
num + cat:  123
importances:  142
stds:  142
with expanded
142
importances mean
[ 0.00000000e+00  0.00000000e+00 -5.32386868e-04  5.32386868e-04
  5.32386868e-04 -3.54924579e-04 -8.87311446e-04  5.32386868e-04
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -5.32386868e-04
  3.54924579e-04  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  1.77462289e-04  3.54924579e-04 -7.09849157e-04
  0.00000000e+00  3.54924579e-04 -8.87311446e-04  3.54924579e-04
  0.00000000e+00  3.54924579e-04  0.00000000e+00  3.54924579e-04
  0.00000000e+00  0.00000000e+00 -3.54924579e-04  1.77462289e-04
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -3.54924579e-04
  8.87311446e-04  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00 -8.87311446e-04 -5.32386868e-04  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  1.77462289e-04  1.77462289e-04
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.0

Unnamed: 0,feature,mean,std
0,cashAndCashEquivalents,0.000000,0.000000
1,shortTermInvestments,0.000000,0.000000
2,cashAndShortTermInvestments,-0.000532,0.000435
3,netReceivables,0.000532,0.000435
4,inventory_balance_sheet,0.000532,0.000435
...,...,...,...
137,Sector,0.000000,0.000000
138,Sector,0.000000,0.000000
139,Sector,0.000000,0.000000
140,Sector,0.000000,0.000000
