In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from dataset_build.clf_dtSet_hugo import build_purity_classification_dataset

In [36]:
N_SHOTS = 1000

df_purity, X, y = build_purity_classification_dataset(
    n_shots=N_SHOTS,
    n_states_total=5000,
    mixed_proportion=0.3  # proportion of mixed states
)

print(f"Dataset shape: {df_purity.shape}")
print(f"Labels distribution:\n{y.value_counts()}")
print(f"\nDataset columns:\n{df_purity.columns.tolist()}")
print(f"\X columns (à revoir):\n{X.columns.tolist()}")

df_purity.head()

Dataset shape: (5000, 14)
Labels distribution:
label_purity
1    3500
0    1500
Name: count, dtype: int64

Dataset columns:
['X_mean', 'Y_mean', 'Z_mean', 'X_real', 'Y_real', 'Z_real', 'theta_ideal', 'phi_ideal', 'X_ideal', 'Y_ideal', 'Z_ideal', 'bloch_radius_real', 'is_pure', 'label_purity']
\X columns (à revoir):
['X_mean', 'Y_mean', 'Z_mean', 'bloch_radius_real']


Unnamed: 0,X_mean,Y_mean,Z_mean,X_real,Y_real,Z_real,theta_ideal,phi_ideal,X_ideal,Y_ideal,Z_ideal,bloch_radius_real,is_pure,label_purity
0,-0.528,0.606,0.592,-0.485723,0.645427,0.589489,0.940371,2.215935,-0.485723,0.645427,0.589489,1.0,True,1
1,-0.324,0.182,0.926,-0.294519,0.164106,0.94145,0.343891,2.633239,-0.294519,0.164106,0.94145,1.0,True,1
2,-0.296,-0.2,-0.422,-0.350134,-0.216279,-0.389233,2.479597,3.501139,-0.575386,-0.216279,-0.788767,0.566456,False,0
3,0.146,-0.146,-0.246,0.173851,-0.098473,-0.236238,2.68213,5.762306,0.384655,-0.220688,-0.896291,0.309402,False,0
4,-0.644,-0.588,0.404,-0.667956,-0.616694,0.416561,1.141137,3.887109,-0.667956,-0.616694,0.416561,1.0,True,1


In [37]:
def compute_bloch_fidelity_kernel(R_A, R_B=None):
    """
    Calcule la matrice de noyau (Kernel Matrix) basée sur la fidélité quantique
    entre des vecteurs de Bloch, sans boucles Python lentes.
    
    Arguments:
        R_A : array (N, 3) - Vecteurs de Bloch du premier ensemble (ex: Train)
        R_B : array (M, 3) - Vecteurs de Bloch du second ensemble (ex: Test). 
              Si None, calcule la matrice symétrique R_A vs R_A.
              
    Retourne:
        K : array (N, M) - Matrice de fidélité
    """
    # Si R_B n'est pas fourni, on calcule la matrice carrée symétrique (Train vs Train)
    if R_B is None:
        R_B = R_A

    # 1. Calcul du produit scalaire pour toutes les paires (r . s)
    # C'est une multiplication matricielle : (N,3) @ (3,M) -> (N,M)
    dot_product_matrix = np.dot(R_A, R_B.T)
    
    # 2. Calcul des normes au carré (||r||^2 et ||s||^2)
    # axis=1 signifie qu'on somme x^2 + y^2 + z^2 pour chaque ligne
    r_sq_A = np.sum(R_A**2, axis=1) # Shape (N,)
    r_sq_B = np.sum(R_B**2, axis=1) # Shape (M,)
    
    # 3. Calcul du terme de déterminant : sqrt((1 - r^2)(1 - s^2))
    # Note : np.clip est vital ici car des données bruitées peuvent avoir r^2 > 1,
    # ce qui causerait des NaNs dans la racine carrée. On limite à 0 minimum.
    factor_A = np.sqrt(np.clip(1 - r_sq_A, 0, None))
    factor_B = np.sqrt(np.clip(1 - r_sq_B, 0, None))
    
    # On fait un "outer product" pour obtenir la matrice (N, M) de ces facteurs
    det_term_matrix = np.outer(factor_A, factor_B)
    
    # 4. Formule finale de la fidélité vectorisée
    # F = 0.5 * (1 + (r . s) + sqrt_term)
    K = 0.5 * (1 + dot_product_matrix + det_term_matrix)
    
    return K

In [38]:
# 1. Extraction des vecteurs de Bloch (X_mean, Y_mean, Z_mean)
# On suppose que ton dataframe s'appelle 'df'
cols_bloch = ['X_mean', 'Y_mean', 'Z_mean']
bloch_vectors = df_purity[cols_bloch].values  # Convertit en numpy array (N_samples, 3)
labels = df_purity['label_purity'].values     # Si tu as besoin des labels pour le SVM

# 2. Split Train / Test
# On splitte les vecteurs directement
X_train_vec, X_test_vec, y_train, y_test = train_test_split(
    bloch_vectors, labels, test_size=0.2, random_state=42
)

In [39]:
# 3. Calcul des Kernels (Instantanné grâce à la vectorisation)

# Matrice Train (Symétrique, taille N_train x N_train)
print("Calcul du Kernel Train...")
K_train = compute_bloch_fidelity_kernel(X_train_vec)

# Matrice Test (Rectangulaire, taille N_test x N_train)
# C'est ce dont le SVM a besoin pour prédire : la similarité du Test par rapport au Train
print("Calcul du Kernel Test...")
K_test = compute_bloch_fidelity_kernel(X_test_vec, X_train_vec)

print(f"Shape K_train: {K_train.shape}")
print(f"Shape K_test: {K_test.shape}")

svc = SVC(kernel="precomputed", C=1.0)
svc.fit(K_train, y_train)

y_pred  = svc.predict(K_test)

acc = accuracy_score(y_test, y_pred)
print(f"Test accuracy: {acc:.4f}\n")

print("Classification report :")
print(classification_report(y_test, y_pred))

print("Matrice de confusion :")
print(confusion_matrix(y_test, y_pred))

Calcul du Kernel Train...


Calcul du Kernel Test...
Shape K_train: (4000, 4000)
Shape K_test: (1000, 4000)
Test accuracy: 0.9650

Classification report :
              precision    recall  f1-score   support

           0       1.00      0.89      0.94       311
           1       0.95      1.00      0.98       689

    accuracy                           0.96      1000
   macro avg       0.98      0.94      0.96      1000
weighted avg       0.97      0.96      0.96      1000

Matrice de confusion :
[[276  35]
 [  0 689]]


Training SVC models