# SVC - Decision Function Analysis

Below is code written to analyse the decision function created by the classical SVC component of the QSVC

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

### Imports

In [None]:
# imports
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import os

# import data class
from utilities.dataset_utils import DiabetesData

from qiskit_machine_learning.algorithms import QSVC

# explainability imports
from shap import KernelExplainer
from shap import summary_plot
from shap import force_plot

# qiskit imports
# simulator
from qiskit_aer import AerSimulator
from qiskit_aer.primitives import SamplerV2 as Sampler

In [3]:
np.random.seed(42)  # reproducibility

Dataset loading - change code hear to analyze different dataset

**N.B.** works only for binary classification datasets

In [4]:
# path to diabetes.csv
path = os.path.join(os.getcwd(), '..', '..', '..', 'utilities', 'diabetes.csv')
# load dataset class
dataset = DiabetesData(path)

In [5]:
# path to diabetes.csv
path = os.path.join(os.getcwd(), '..', '..', '..', 'models', 'qml-simulator', 'qsvc_best_params.model')

qsvc = QSVC.load(path)

In [6]:
# list of feature names
feature_names = [
    "Pregnancies",
    "Glucose",
    "BloodPressure",
    "SkinThickness",
    "Insulin",
    "BMI",
    "DiabetesPedigreeFunction",
    "Age"
]

In [7]:
# get data
X_train, X_test, y_train, y_test = dataset.preprocess_data_ranged()

In [8]:
# setup backend simulator
backend = AerSimulator()
backend.set_options(max_parallel_threads=os.cpu_count(), method='statevector')

In [9]:
# sampler
sampler = Sampler.from_backend(backend)

In [None]:
samples = len(X_test)
decisions = []
batch_size = 10

for start in range(0, samples, batch_size):
    end = min(start + batch_size, samples)
    
    remaining_samples = samples - end
    progress = (end / samples) * 100
    
    batch = X_test[start:end]
    
    decisions.append(qsvc.decision_function(batch))
    
    print(f"Processed {end}/{samples} samples ({progress:.2f}%). Remaining: {remaining_samples}")
    
decisions = np.concatenate(decisions)

Processed 10/231 samples (4.33%). Remaining: 221
Processed 20/231 samples (8.66%). Remaining: 211
Processed 30/231 samples (12.99%). Remaining: 201
Processed 40/231 samples (17.32%). Remaining: 191
Processed 50/231 samples (21.65%). Remaining: 181
Processed 60/231 samples (25.97%). Remaining: 171
Processed 70/231 samples (30.30%). Remaining: 161
Processed 80/231 samples (34.63%). Remaining: 151
Processed 90/231 samples (38.96%). Remaining: 141
Processed 100/231 samples (43.29%). Remaining: 131


In [None]:
df = pd.DataFrame(X_test, columns=feature_names)
df['decision'] = decisions

In [None]:
corr = df.corr()

plt.figure(figsize=(10, 6))
# Display only correlation values of features with the decision function
sns.heatmap(corr[['decision']].drop('decision'), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Feature Correlation with QSVC Decision Function")
plt.show()

When features are examined in isolation (above), you can see each have a positive correlation. This is inline with the domain of the Pima Indians Diabetes dataset, meaning an increase in any of these values increases the risk of that specific patient being diabetic (higher decision function outputs).

In [None]:
df['target'] = y_test

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(decisions, kde=True, bins=30, color='skyblue')
plt.axvline(0, color='red', linestyle='--', label='Decision Threshold (0)')
plt.xlabel("Decision Function Output")
plt.ylabel("Frequency")
plt.title("Histogram and Density Plot of QSVC Decision Function Output")
plt.legend()
plt.show()

In [None]:
positive_samples = df[df['target'] == 1]
negative_samples = df[df['target'] == 0]

positive_decision_count = 0
negative_decision_count = 0

for decision in decisions:
    if decision > 0:
        positive_decision_count += 1
    else:
        negative_decision_count += 1
        
print(f"Positive Samples: {positive_samples.shape[0]}")
print(f"Positive Decision Count: {positive_decision_count}")
print(f"Negative Samples: {negative_samples.shape[0]}")
print(f"Negative Decision Count: {negative_decision_count}")

In [None]:
df_results = pd.DataFrame({
    'decision': decisions,
    'target': y_test
})

In [None]:
wrong_preds = df_results[
    ((df_results['decision'] > 0) & (df_results['target'] == 0)) |
    ((df_results['decision'] < 0) & (df_results['target'] == 1))
]

In [None]:
X_wrong = X_test[wrong_preds.index]
X_wrong.shape

In [None]:
y_wrong = y_test[wrong_preds.index]
y_wrong.shape

In [None]:
uncertain_preds = df_results[df_results['decision'].abs() < 0.5]
X_uncertain = X_test[uncertain_preds.index]
X_uncertain.shape
y_uncertain = y_test[uncertain_preds.index]
y_uncertain.shape

In [None]:
background = X_train[np.random.choice(X_train.shape[0], 100, replace=False)]

In [None]:
explainer = KernelExplainer(qsvc.decision_function, background, link="identity")

In [None]:
n_wrong = X_wrong.shape[0]
n_uncertain = X_uncertain.shape[0]

wrong_sample_size = min(50, n_wrong)
uncertain_sample_size = min(50, n_uncertain)

In [None]:
idxs = np.random.choice(n_wrong, wrong_sample_size, replace=False)
X_wrong_sample = X_wrong[idxs]

# shap values for wrongly classified samples
shap_values_wrong = explainer.shap_values(X_wrong_sample, nsamples=100)

summary_plot(
    shap_values_wrong, 
    X_wrong_sample,
    feature_names=feature_names, 
    plot_type="bar"
)

force_plot(
    explainer.expected_value, 
    shap_values_wrong[0],
    X_wrong_sample[0],
    feature_names=feature_names,
    matplotlib=True
)

In [None]:
idxs = np.random.choice(n_uncertain, uncertain_sample_size, replace=False)
X_uncertain_sample = X_uncertain[idxs]

# shap values for wrongly classified samples
shap_values_uncertain = explainer.shap_values(X_uncertain_sample, nsamples=100)

summary_plot(
    shap_values_uncertain, 
    X_uncertain_sample,
    feature_names=feature_names, 
    plot_type="bar"
)

force_plot(
    explainer.expected_value, 
    shap_values_uncertain[0],
    X_uncertain_sample[0],
    feature_names=feature_names,
    matplotlib=True
)