# Load the  test data

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Load the data
df = pd.read_csv("Heart Disease Cleveland.csv")
X = df.drop(['target'], axis="columns")
y = df['target']

# Define the variance threshold for feature selection
variance_threshold = VarianceThreshold(threshold=0.5)  # Adjust the threshold as needed

# Define the preprocessor with variance threshold and scaler
preprocessor = Pipeline(steps=[
    ('variance_threshold', variance_threshold),
    ('scaler', MinMaxScaler())
])

preprocessor


In [2]:
# Apply the pipeline to the features
X_transformed = preprocessor.fit_transform(X)

# Update feature names after VarianceThreshold
selected_features = X.columns[variance_threshold.get_support()]

# Convert the transformed features back to a DataFrame (optional)
X_transformed_df = pd.DataFrame(X_transformed, columns=selected_features)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_transformed_df, y,
    train_size=0.8,
    stratify=y,
    random_state=42
)

# Information on feature selection
n_features_before = X.shape[1]
n_features_after = X_transformed_df.shape[1]
features_removed = list(set(X.columns) - set(selected_features))

print(f"Number of features before: {n_features_before}")
print(f"Number of features after: {n_features_after}")
print(f"Features removed: {features_removed}")

Number of features before: 13
Number of features after: 7
Features removed: ['exang', 'fbs', 'restecg', 'sex', 'thal', 'slope']


In [3]:
X_test.head()

Unnamed: 0,age,cp,trestbps,chol,thalach,oldpeak,ca
179,0.583333,0.0,0.528302,0.342466,0.312977,0.096774,0.25
197,0.791667,0.0,0.292453,0.292237,0.70229,0.032258,0.5
285,0.354167,0.0,0.433962,0.422374,0.374046,0.290323,0.5
194,0.645833,0.666667,0.433962,0.134703,0.641221,0.483871,0.0
188,0.4375,0.666667,0.433962,0.244292,0.70229,0.096774,0.25


# Load the model

In [4]:
from qiskit_machine_learning.algorithms import QSVC

qsvc_model = QSVC.load('model/qsvc_linear.model')

In [5]:
qsvc_model

# QSVC Explainability

In [None]:
feature_names = X_test.columns.tolist()

In [7]:
import shap

In [8]:
qsvc_explainer = shap.KernelExplainer(qsvc_model.predict, X_test)


In [None]:
sample = X_test.iloc[[4]]  
shap_values = qsvc_explainer.shap_values(sample)
shap.force_plot(qsvc_explainer.expected_value,
                shap_values[0],
                sample.iloc[0],
                feature_names=feature_names)

  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
# Get SHAP values for a single sample
sample = X_test.iloc[[4]]
sample_shap_values = qsvc_explainer.shap_values(sample)

# Create the waterfall plot
shap.plots._waterfall.waterfall_legacy(
    qsvc_explainer.expected_value,
    sample_shap_values[0], 
    feature_names = feature_names
)