In [1]:
import keras
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from cvf_da_model import encode_categorical_features, CATEGORICAL_FEATURES, NUMERICAL_FEATURES
from data_preprocessing import process_data_for_training
from sklearn.utils import shuffle

2023-08-08 12:17:27.527138: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Compute CVF-DA Model's Permutation Importance

In [12]:
# Load the trained model
cvf_da_model = keras.models.load_model('out/models/cvf_da_20230808_113839.keras')

In [None]:
# Prepare data that we are evaluating with
prepared_data = process_data_for_training('test_data/recommender_testing_data2.csv')
prepared_data = encode_categorical_features(prepared_data, LabelEncoder())
prepared_data_input = [prepared_data[feature].values for feature in CATEGORICAL_FEATURES] + \
                      [prepared_data[NUMERICAL_FEATURES].values]

test_data/recommender_testing_data2.csv - Data import complete

test_data/recommender_testing_data2.csv - Diagnostic consultation initiated

test_data/recommender_testing_data2.csv - Vehicle state data derived



In [None]:
# Get the original predictions and compute original performance
original_predictions = cvf_da_model.predict(prepared_data_input)
original_class_predictions = np.argmax(original_predictions, axis=1)
original_f1 = f1_score(original_class_predictions, np.argmax(original_predictions, axis=1), average='weighted')

original_predictions

In [16]:
# For storing the drop in performance for each feature
importance_scores = {}

# Compute Permutation Importance for each feature
for feature in CATEGORICAL_FEATURES + NUMERICAL_FEATURES:
    shuffled_data = prepared_data.copy()
    shuffled_data[feature] = shuffle(shuffled_data[feature].values)

    shuffled_data_input = [shuffled_data[f].values for f in CATEGORICAL_FEATURES] + \
                          [shuffled_data[NUMERICAL_FEATURES].values]

    # Get predictions on shuffled data
    shuffled_predictions = cvf_da_model.predict(shuffled_data_input)
    shuffled_class_predictions = np.argmax(shuffled_predictions, axis=1)

    # Compute drop in performance using F1 score with weighted average
    shuffled_f1 = f1_score(shuffled_class_predictions, np.argmax(original_predictions, axis=1), average='weighted')
    drop_in_f1 = original_f1 - shuffled_f1
    print(f"Feature: {feature}, Shuffled F1: {shuffled_f1}")
    importance_scores[feature] = drop_in_f1

Feature: model, Shuffled F1: 1.0
Feature: modelyear, Shuffled F1: 1.0
Feature: driver, Shuffled F1: 1.0
Feature: plant, Shuffled F1: 1.0
Feature: engine, Shuffled F1: 1.0
Feature: transmission, Shuffled F1: 1.0
Feature: module, Shuffled F1: 1.0
Feature: dtcbase, Shuffled F1: 1.0
Feature: faulttype, Shuffled F1: 1.0
Feature: dtcfull, Shuffled F1: 1.0
Feature: year, Shuffled F1: 1.0
Feature: month, Shuffled F1: 1.0
Feature: dayOfWeek, Shuffled F1: 1.0
Feature: weekOfYear, Shuffled F1: 1.0
Feature: season, Shuffled F1: 1.0
Feature: i_original_vfg_code, Shuffled F1: 1.0
Feature: softwarepartnumber, Shuffled F1: 1.0
Feature: hardwarepartnumber, Shuffled F1: 1.0
Feature: i_p_css_code, Shuffled F1: 1.0
Feature: i_original_ccc_code, Shuffled F1: 1.0
Feature: i_original_function_code, Shuffled F1: 1.0
Feature: i_original_vrt_code, Shuffled F1: 1.0
Feature: i_current_vfg_code, Shuffled F1: 1.0
Feature: i_current_function_code, Shuffled F1: 1.0
Feature: i_current_vrt_code, Shuffled F1: 1.0
Featur

In [18]:
# Print out importance scores
sorted_importances = sorted(importance_scores.items(), key=lambda x: x[1], reverse=True)
for feature, score in sorted_importances:
    print(f"{feature}: {score}")

model: 0.0
modelyear: 0.0
driver: 0.0
plant: 0.0
engine: 0.0
transmission: 0.0
module: 0.0
dtcbase: 0.0
faulttype: 0.0
dtcfull: 0.0
year: 0.0
month: 0.0
dayOfWeek: 0.0
weekOfYear: 0.0
season: 0.0
i_original_vfg_code: 0.0
softwarepartnumber: 0.0
hardwarepartnumber: 0.0
i_p_css_code: 0.0
i_original_ccc_code: 0.0
i_original_function_code: 0.0
i_original_vrt_code: 0.0
i_current_vfg_code: 0.0
i_current_function_code: 0.0
i_current_vrt_code: 0.0
i_cpsc_code: 0.0
i_cpsc_vfg_code: 0.0
i_css_code: 0.0
v_transmission_code: 0.0
v_drive_code: 0.0
v_engine_code: 0.0
ic_repair_dealer_id: 0.0
ic_eng_part_number: 0.0
ic_serv_part_number: 0.0
ic_part_suffix: 0.0
ic_part_base: 0.0
ic_part_prefix: 0.0
ic_causal_part_id: 0.0
ic_repair_country_code: 0.0
elapsedTimeSec: 0.0
timeSinceLastActivitySec: 0.0
odomiles: 0.0
vehicleAgeAtSession: 0.0
daysSinceWarrantyStart: 0.0
i_mileage: 0.0
i_time_in_service: 0.0
i_months_in_service: 0.0
