# Model 3 Permutation Importance

In [1]:
# Packages
import sys
import os
from sklearn.inspection import permutation_importance
# Add the parent directory to sys.path
sys.path.append(os.path.abspath('..'))
# Import module
from Logistic_Regression_Functions import *
# Model name
model_name = 'exclude_previous_rating_model_3'

## Standard Model Code

In [2]:
# Load the data
df = load_data()

In [3]:
# Get the column names and mapping
numeric_feature_columns, cat_feature_columns, target_column, custom_mapping = get_column_names_and_mapping(model_name)

In [4]:
# Create matrices
_, X_test_scaled, _, y_test = prepare_matrices(df, numeric_feature_columns, cat_feature_columns, target_column, custom_mapping)


preprocessor
['num__EBIT' 'num__common_plus_preferred_stock' 'num__workingCapital' ...
 'cat__Sector_Materials' 'cat__Sector_Real Estate' 'cat__Sector_Utilities']


## Load Trained Model

In [5]:
# Load trained model
model = joblib.load('../../../../Output/Modelling/Logistic Regression/' + model_name + '/' + model_name + '_best_estimator.pkl')
model

## Permutation Importance

In [9]:
# Perform permutation importance
result = permutation_importance(model, X_test_scaled, y_test, n_repeats=1000, random_state=222, n_jobs=-1)

# Check lengths
print('shape X: ', X_test_scaled.shape)
print('num + cat: ', len(numeric_feature_columns + cat_feature_columns))
print('importances: ', len(result.importances_mean))
print('stds: ', len(result.importances_std))

# Expanded feature column names
# Version for include previous rating
if 'include_previous_rating' in model_name:
    expanded_cat_cols = ['cat__rating_on_previous_fixed_quarter_date_A',
    'cat__rating_on_previous_fixed_quarter_date_AA',
    'cat__rating_on_previous_fixed_quarter_date_AAA',
    'cat__rating_on_previous_fixed_quarter_date_B',
    'cat__rating_on_previous_fixed_quarter_date_BB',
    'cat__rating_on_previous_fixed_quarter_date_BBB',
    'cat__rating_on_previous_fixed_quarter_date_C',
    'cat__rating_on_previous_fixed_quarter_date_CC',
    'cat__rating_on_previous_fixed_quarter_date_CCC',
    'cat__rating_on_previous_fixed_quarter_date_D',
    'cat__Sector_Communication Services',
    'cat__Sector_Consumer Discretionary',
    'cat__Sector_Consumer Staples',
    'cat__Sector_Energy',
    'cat__Sector_Financials', 
    'cat__Sector_Health Care',
    'cat__Sector_Industrials', 
    'cat__Sector_Information Technology',
    'cat__Sector_Materials',
    'cat__Sector_Real Estate',
    'cat__Sector_Utilities']
# Version for exclude previous rating
if 'exclude_previous_rating' in model_name:
    expanded_cat_cols = ['cat__rating_on_previous_fixed_quarter_date_A',
    'cat__rating_on_previous_fixed_quarter_date_AA',
    'cat__rating_on_previous_fixed_quarter_date_AAA',
    'cat__rating_on_previous_fixed_quarter_date_B',
    'cat__rating_on_previous_fixed_quarter_date_BB',
    'cat__rating_on_previous_fixed_quarter_date_BBB',
    'cat__rating_on_previous_fixed_quarter_date_C',
    'cat__rating_on_previous_fixed_quarter_date_CC',
    'cat__rating_on_previous_fixed_quarter_date_CCC',
    'cat__rating_on_previous_fixed_quarter_date_D']

print('with expanded')
print(len(numeric_feature_columns + expanded_cat_cols))

print('importances mean')
print(result.importances_mean)

print('imp std')
print(result.importances_std)

# Put column name, mean and std in a dataframe
result = pd.DataFrame({'feature': numeric_feature_columns + expanded_cat_cols, 'mean': result.importances_mean, 'std': result.importances_std})

# Output to disk
result.to_parquet('../../../../Output/Modelling/Logistic Regression/' + model_name + '/' + model_name + '_permutation_importance.parquet', index=False)

result