### In this Notebook 
* We Downloaded data set from TUDataset (using from torch_geometric.datasets import TUDataset ) 
* Extracted graph features (structural features) using NetworkX python package.
  
Train the SVM using precomputed kernel   
* Calculated Quantum Kernel using 'FidelityQuantumKernel' from 'qiskit_machine_learning.kernels'. 
* Provided Quantum Kernel/Features to train SVC from Quantum Kernel   

Train SVM using Quantum Kernel as a callable function   
* We set up Quantum Kernel function as a callable function 
* SVM is trained and tested using Quantum Kernel as a callable functionfunction

## Import Packages

In [42]:
#print(classifier.draw(output='text'))

from qiskit_machine_learning.utils import algorithm_globals
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.state_fidelities import ComputeUncompute
from qiskit.primitives import StatevectorSampler as Sampler
from qiskit.quantum_info import Statevector, state_fidelity
from qiskit.circuit.library import zz_feature_map

from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score

#from qiskit.primitives import Sampler
from torch_geometric.datasets import TUDataset
from torch_geometric import utils
import networkx as nx
import matplotlib.pyplot as plt
import math
import numpy as np

from typing import List, Tuple, Optional, Dict, Any
import os, math, random
from collections import Counter, defaultdict


### Configuration Parameters

In [43]:
# number of qubits is equal to the number of features
num_qubits = 7

# number of steps performed during the training procedure
tau = 100

# regularization parameter
C = 1000

In [44]:
# Configuration Dictionary
CONFIG = {
    "datasets": ["AIDS", "PROTEINS", "NCI1", "PTC_MR", "MUTAG"],
    "max_graphs_per_dataset": 200,     # lower for speed
    "random_seed": 42,

    # QURI Ego-QW (minimal, on ego subgraphs)
    "quri": {
        "radius": 1,
        "max_nodes": 10,
        "n_samples": 12,
        "times": [0.5, 1.0],
        "trotter_steps": 4,
        "gamma": 1.0,
        "label_weight": 0.5,
        "init_mode": "uniform",
        "use_quri_circuits": True,
        "max_qubits_for_circuit": 10
    },

    # CTQW Full-Graph (pure classical) -- NO trotter_steps
    "ctqw": {
        "times": [0.5, 1.0],
        "gamma": 1.0,
        "use_label_potential": True,
        "hamiltonian": "adjacency"  # or "laplacian"
    },

    # Observables for QURI ego features
    "observables": ["proj_i", "adjacency", "adj_pow_2", "degree"],

    # Baselines
    "use_shortest_path_baseline": True,
    "use_wl_baseline": True,
    "wl_iterations": 3,

    # CV (fast)
    "cv_folds": 5,
    "inner_cv_folds": 2,
    "svm_param_grid": {"C": [1.0, 10.0], "gamma": ["scale", 0.1]},
    "use_class_weight_balanced": True
}

In [45]:
def calc_norm (numbers: list):
    squared_numbers = []
    total=0
    for num in numbers:
        squared_numbers.append(num ** 2)
        total=total+num ** 2
    
    return list(map(lambda item: math.sqrt(item / total), squared_numbers))

In [47]:
# Evaluation Function
def evaluate_features_nested_cv(X: np.ndarray, y: List[int]):
    y = np.array(y)
    skf_outer = StratifiedKFold(n_splits=CONFIG["cv_folds"], shuffle=True, random_state=CONFIG["random_seed"])
    accs, f1s = [], []
    for tr, te in skf_outer.split(X, y):
        
        Xtr, Xte = X[tr], X[te]; ytr, yte = y[tr], y[te]
        scaler = StandardScaler().fit(Xtr)
        Xtr = scaler.transform(Xtr); Xte = scaler.transform(Xte)
        clf = GridSearchCV(
            SVC(kernel="rbf", class_weight="balanced" if CONFIG["use_class_weight_balanced"] else None),
            CONFIG["svm_param_grid"],
            cv=CONFIG["inner_cv_folds"],
            scoring="f1_weighted",
            n_jobs=-1
        )
        clf.fit(Xtr, ytr)
        yp = clf.predict(Xte)
        accs.append(accuracy_score(yte, yp))
        f1s.append(f1_score(yte, yp, average="weighted"))
        print("iteration")
    return np.array(accs), np.array(f1s)

## Prepare the Dataset

In [48]:
ds = TUDataset(root='./data/', name='PROTEINS')
    
algorithm_globals.random_seed = 12345    
feature_map = zz_feature_map(feature_dimension=num_qubits, reps=1)

features=[]
labels=[]
G_with_edge_Attri=[]
kernel_matrix_new=[]
for ds_i in range(len(ds)):

    graph_val=[]
    kernal_in_vect=[]
    G = utils.to_networkx(ds[ds_i])
    G_with_Node_Attri = {}

    
    # Betweenness Centrality: Measures how often a node lies on the shortest path between other nodes
    betweenness_centrality = nx.betweenness_centrality(G)
    total_value=0
    for node, value in betweenness_centrality.items():
        G_with_Node_Attri[node] = [round(value,6)]
        total_value=total_value+value
    average_val= total_value/len(G.nodes)
    graph_val.append(average_val)
    
    
    # Closeness Centrality: Measures how close a node is to all other nodes
    closeness_centrality = nx.closeness_centrality(G)
    total_value=0
    for node, value in closeness_centrality.items():
        G_with_Node_Attri[node].append(round(value,6))
        total_value=total_value+value
    average_val= total_value/len(G.nodes)
    graph_val.append(average_val)
    
    # Calculate the clustering coefficient for each node
    node_clustering_coefficients = nx.clustering(G)
    total_value=0
    for node, value in node_clustering_coefficients.items():
        G_with_Node_Attri[node].append(round(value,6))
        total_value=total_value+value
    average_val= total_value/len(G.nodes)
    graph_val.append(average_val)
    
    # Calculate the eigenvector centrality
    node_eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=50000)
    for node, value in node_eigenvector_centrality.items():
        G_with_Node_Attri[node].append(round(value,6))
        total_value=total_value+value
    average_val= total_value/len(G.nodes)
    graph_val.append(average_val)
    
    # Calculate the load centrality
    total_value=0
    node_load_centrality = nx.load_centrality(G)
    for node, value in node_load_centrality.items():
        G_with_Node_Attri[node].append(round(value,6))
        total_value=total_value+value
    average_val= total_value/len(G.nodes)
    graph_val.append(average_val)

    # Calculate the harmonic centrality
    total_value=0
    node_harmonic_centrality=nx.harmonic_centrality(G)
    max_value = max(node_harmonic_centrality.values())
    for node, value in node_harmonic_centrality.items():
        G_with_Node_Attri[node].append(round((value / max_value),6))
        total_value=total_value+value
    average_val= total_value/len(G.nodes)
    graph_val.append(average_val)

    total_value=0
    for edge,v in G.edges.items():
        val=state_fidelity(Statevector(calc_norm(G_with_Node_Attri[edge[0]])),Statevector(calc_norm(G_with_Node_Attri[edge[1]])))
        G.edges[edge[0], edge[1]]['weight']=val        
        total_value=total_value+val
    average_val= total_value/len(G.edges)
    graph_val.append(average_val)

    # Graph Number, Features of graph, Label
    G_with_edge_Attri.append([ds_i,graph_val, (ds[ds_i].y).item()])

    new_features = [row[1]for row in G_with_edge_Attri]
    new_labels = [row[2]for row in G_with_edge_Attri]

print("classical features calculated")

classical features calculated


### Calculated Quantum Kernel using 'FidelityQuantumKernel'

In [None]:
sampler = Sampler()
fidelity = ComputeUncompute(sampler=sampler)
feature_map = zz_feature_map(feature_dimension=7, reps=1)
comp_kernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)
print("Start kernel_matrix")
kernel_matrix = comp_kernel.evaluate(x_vec=new_features)
print("End computing kernel_matrix")

Start kernel_matrix


### Train the SVM using precomputed kernel: Provided Quantum Kernel/Features to train SVC from Quantum Kernel

In [None]:
# SVM using precomputed kernelfor PROTEINS

acc_q_proteins, f1_q_proteins = evaluate_features_nested_cv(kernel_matrix, train_labels)
print(f"Score Classifier using Fidelity QK for (PROTEINS): Acc {acc_q_proteins.mean():.4f} ± {acc_q_proteins.std():.4f} | F1 {f1_q_proteins.mean():.4f} ± {f1_q_proteins.std():.4f}")

## Split Train and Test dataset

In [None]:
from sklearn.model_selection import train_test_split
train_features, test_features, train_labels, test_labels = train_test_split(
    new_features, new_labels, train_size=0.9, shuffle=False
)

In [23]:
file_path = "train_labels.txt"

with open(file_path, 'w') as file:
    for item in train_labels:
        file.write(f"{item}\n")

## Set Up the Quantum Kernel used as callable function

In [25]:
#num_qubits is dimension = 7
c_feature_map = zz_feature_map(feature_dimension=num_qubits, reps=2, entanglement="linear")
sampler = Sampler()
fidelity = ComputeUncompute(sampler=sampler)
callable_kernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=c_feature_map)

## Train SVM using Quantum Kernel as a callable function

In [26]:
callable_kernel_svc = SVC(kernel=callable_kernel.evaluate)
print("Start svc.fit")
callable_kernel_svc.fit(train_features, train_labels)
print("Start svc.score")
adhoc_score_callable_function = callable_kernel_svc.score(test_features, test_labels)
print(f"Callable kernel classification test score: {adhoc_score_callable_function}")
# Last run with 7 feature, Callable kernel classification test score: 0.3991031390134529, 3.5 hours reaining time with 16 GB CPU

Start svc.fit
Start svc.score
Callable kernel classification test score: 0.3991031390134529


In [24]:
# To Save and Load Model
import joblib

    # Save the model
joblib.dump(adhoc_svc, 'adhoc_svc_new.joblib')

    # Load the model
#adhoc_svc = joblib.load('svc_model.joblib')

    # Use the loaded model


['svc_model_pre_t.joblib']