In [5]:
import numpy as np
import scipy.io
from scipy.linalg import eigh
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

from qiskit import QuantumCircuit
from qiskit.quantum_info import Statevector

# 1) Load QM7, extract 23 eigenvalues per Coulomb matrix => 23 features

def load_qm7_eigenvalues(matfile='qm7.mat'):
    """
    Loads QM7 from `matfile` and for each 23x23 Coulomb matrix
    computes the 23 sorted eigenvalues. Returns X_eig (N,23) and Y (N,).
    """
    data = scipy.io.loadmat(matfile)
    C = data['X']          # shape (N,23,23)
    Y = data['T'].ravel()  # shape (N,)
    N = C.shape[0]

    X_list = []
    for i in range(N):
        M = 0.5*(C[i] + C[i].T)    # symmetrize
        e_vals = eigh(M, eigvals_only=True)
        X_list.append(np.sort(e_vals))
    X_eig = np.array(X_list)      # (N,23)
    return X_eig, Y


# 2) Amplitude‐encoding helper 

def amplitude_encoding_circuit(vec, num_qubits):
    dim = 2**num_qubits
    qc = QuantumCircuit(num_qubits)
    padded = np.zeros(dim, dtype=complex)
    padded[:len(vec)] = vec
    norm = np.linalg.norm(padded)
    if norm < 1e-9:
        padded[0] = 1.0
        norm = 1.0
    padded /= norm
    qc.initialize(padded, range(num_qubits))
    return qc

def build_statevectors(X, num_qubits):
    N = X.shape[0]
    dim = 2**num_qubits
    sv = np.zeros((N, dim), dtype=complex)
    for i in range(N):
        qc = amplitude_encoding_circuit(X[i], num_qubits)
        sv[i] = Statevector.from_instruction(qc).data
    return sv

def compute_kernel_statevector(sv):
    N = sv.shape[0]
    K = np.zeros((N, N))
    for i in range(N):
        for j in range(i, N):
            ov = np.vdot(sv[i], sv[j])
            K[i,j] = abs(ov)**2
            K[j,i] = K[i,j]
    return K


# 3) Main: QSVR on 23-dimensional eigenvalues

def main_qsvr_on_eigenvalues(
    matfile='qm7.mat',
    subset_size=600,
    test_size=0.2,
    num_qubits=5,       # minimum 5 to encode 23 dims
    random_seed=42
):
    """
    1) Loads dataset, flatten => 529 features
    2) Subsamples
    3) Scales features with StandardScaler
    4) Scales targets (subtract mean, divide std)
    5) Train/test split
    6) Amplitude encode -> direct statevector kernel
    7) Fits SVR
    8) inverts target transform, measures final MAE, R^2
    """

    # 1) Loads eigenvalues
    X_all, Y_all = load_qm7_eigenvalues(matfile)
    Ntotal = len(X_all)
    print(f"Loaded {Ntotal} molecules with 23 eigenvalue features.")

    # 2) Subsamples
    np.random.seed(random_seed)
    idxs = np.random.choice(Ntotal, subset_size, replace=False)
    X_sub, Y_sub = X_all[idxs], Y_all[idxs]

    # 3) Scales features & targets
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_sub)
    Y_mean, Y_std = Y_sub.mean(), Y_sub.std()
    Y_scaled = (Y_sub - Y_mean)/Y_std

    # 4) Train/test split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X_scaled, Y_scaled,
        test_size=test_size, random_state=random_seed
    )
    print(f"Train={X_train.shape[0]}, Test={X_test.shape[0]}. Using {num_qubits} qubits.")

    # 5) Builds kernels via amplitude‐encoded statevectors
    print("Building train statevectors...")
    sv_train = build_statevectors(X_train, num_qubits)
    print("Computing train kernel...")
    K_train = compute_kernel_statevector(sv_train)

    # 6) Fits SVR (precomputed)
    svr = SVR(kernel='precomputed', C=1e4, gamma=1e-3, epsilon=0.01)
    svr.fit(K_train, Y_train)

    # 7) Tests kernel
    print("Building test statevectors...")
    sv_test = build_statevectors(X_test, num_qubits)
    K_test = np.array([[abs(np.vdot(sv_test[i], sv_train[j]))**2
                        for j in range(len(sv_train))]
                       for i in range(len(sv_test))])

    # 8) Predicts & inverts scaling
    Y_pred_scaled = svr.predict(K_test)
    Y_pred = Y_pred_scaled * Y_std + Y_mean
    Y_true = Y_test  * Y_std + Y_mean

    # 9) Evaluates
    print("\nQSVR on 23-D eigenvalues (amp-encoded):")
    print(f" MAE: {mean_absolute_error(Y_true, Y_pred):.3f}")
    print(f"  R2: {r2_score(Y_true, Y_pred):.3f}")

if __name__ == "__main__":
    main_qsvr_on_eigenvalues(
        matfile='qm7.mat',
        subset_size=7165,
        test_size=0.2,
        num_qubits=5,
        random_seed=42
    )


Loaded 7165 molecules with 23 eigenvalue features.
Train=5732, Test=1433. Using 5 qubits.
Building train statevectors...
Computing train kernel...
Building test statevectors...

QSVR on 23-D eigenvalues (amp-encoded):
 MAE: 24.046
  R2: 0.967


In [3]:
# “best” support-vector indices for each representation

import numpy as np
import scipy.io
from scipy.linalg import eigh
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

from qiskit import QuantumCircuit
from qiskit.quantum_info import Statevector

BEST_IDS_23 = [
    305, 286, 145, 285, 284, 148, 281, 280, 279, 152,
    143, 277, 276, 157, 158, 275, 161, 274, 273, 270,
    166, 154, 269, 287, 139, 112, 309, 306, 115, 117,
    121, 122, 301, 125, 140, 299, 297, 129, 130, 131,
    292, 290, 135, 137, 138, 298, 268, 169, 263, 249,
    204, 246, 206, 207, 245, 244, 243, 213, 202, 214,
    241, 217, 239, 235, 221, 232, 231, 224, 230, 242,
    251, 252, 198, 262, 173, 175, 176, 177, 178, 179,
    180, 261, 260, 259, 185, 186, 187, 188, 189, 257,
    193, 194, 196, 256, 111, 311, 227, 107,  38,  39,
    40, 41, 361, 45, 48, 49, 366, 50, 52, 109,
    54, 56, 354, 61, 352, 351, 51, 64, 368, 373,
    397,   5,   6, 390,   9, 384, 383, 15, 29, 382,
    379, 21, 377, 375, 374, 25, 26, 27, 18, 350,
    53, 349, 85, 87, 88, 66, 91, 92, 93, 95,
    96, 325, 321, 101, 103, 105, 106, 331,  83,  89,
    334, 348, 345,  82,  70,  74, 344, 341,  71,  79,
    336, 303, 102,  34, 132,  46, 191, 160, 389, 392,
    86, 199, 391, 313, 182, 174, 155, 381, 209, 362,
    304,  14, 386, 317,  31, 226, 367,  72, 358, 378,
    219,  73,   0, 393, 104, 201, 267,   7, 237,   2,
    387, 134, 114, 324,  37, 222,  81, 288, 123, 310,
    372, 212,   1,  65, 332, 225, 156, 228,  17, 172,
    283, 167, 124,  62,  80, 380, 144, 162, 234, 320,
    216, 371, 388, 147, 338, 218, 238,  12,  20,  67,
    370, 150, 343, 210, 159, 319, 340, 247, 183,  43,
    363, 240, 289, 385, 127, 314, 168, 295,  24, 335,
    369, 220, 116, 236, 328, 339, 133, 398, 399, 357,
    100,   4, 322, 315, 360, 163,  23, 356, 265, 266,
    192, 396,  13, 151,  33,  16, 253,  84, 141, 171
]

#1) Load QM7 eigenvalues
def load_qm7_eigenvalues(matfile='qm7.mat'):
    data = scipy.io.loadmat(matfile)
    C = data['X']          # shape (N,23,23)
    Y = data['T'].ravel()  # shape (N,)
    N = C.shape[0]

    X_list = []
    for i in range(N):
        M = 0.5*(C[i] + C[i].T)    
        e_vals = eigh(M, eigvals_only=True)
        X_list.append(np.sort(e_vals))
    return np.array(X_list), Y

#) Amplitude‐encoding helpers
def amplitude_encoding_circuit(vec, num_qubits):
    dim = 2**num_qubits
    qc = QuantumCircuit(num_qubits)
    padded = np.zeros(dim, dtype=complex)
    padded[:len(vec)] = vec
    norm = np.linalg.norm(padded)
    if norm < 1e-9:
        padded[0] = 1.0
        norm = 1.0
    padded /= norm
    qc.initialize(padded, range(num_qubits))
    return qc

def build_statevectors(X, num_qubits):
    N = X.shape[0]
    dim = 2**num_qubits
    sv = np.zeros((N, dim), dtype=complex)
    for i in range(N):
        qc = amplitude_encoding_circuit(X[i], num_qubits)
        sv[i] = Statevector.from_instruction(qc).data
    return sv

def compute_kernel_statevector(sv):
    N = sv.shape[0]
    K = np.zeros((N, N))
    for i in range(N):
        for j in range(i, N):
            ov = np.vdot(sv[i], sv[j])
            K[i,j] = abs(ov)**2
            K[j,i] = K[i,j]
    return K

#3) Main: QSVR on top 300 by BEST_IDS_23 
def main_qsvr_best300(
    matfile='qm7.mat',
    subset_size=300,
    test_size=0.2,
    num_qubits=5,
    random_seed=42
):
    # 1) Load eigenvalues
    X_all, Y_all = load_qm7_eigenvalues(matfile)
    print(f"Loaded {len(X_all)} molecules with 23-dim eigenvalues.")

    # 2) Take top 300 best IDs
    idxs = BEST_IDS_23[:subset_size]
    X_sub, Y_sub = X_all[idxs], Y_all[idxs]
    print(f"Subsampled top {subset_size} best molecules via BEST_IDS_23")

    # 3) Scale
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_sub)
    Y_mean, Y_std = Y_sub.mean(), Y_sub.std()
    Y_scaled = (Y_sub - Y_mean)/Y_std

    # 4) Split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X_scaled, Y_scaled,
        test_size=test_size, random_state=random_seed
    )
    print(f"Train={len(X_train)}, Test={len(X_test)}, Qubits={num_qubits}")

    # 5) Kernel
    sv_train = build_statevectors(X_train, num_qubits)
    K_train = compute_kernel_statevector(sv_train)

    # 6) SVR
    svr = SVR(kernel='precomputed', C=1e4, gamma=1e-3, epsilon=0.01)
    svr.fit(K_train, Y_train)

    # 7) Test kernel
    sv_test = build_statevectors(X_test, num_qubits)
    K_test = np.array([[abs(np.vdot(sv_test[i], sv_train[j]))**2
                        for j in range(len(sv_train))]
                       for i in range(len(sv_test))])

    # 8) Predict & invert
    Y_pred_s = svr.predict(K_test)
    Y_pred   = Y_pred_s * Y_std + Y_mean
    Y_true   = Y_test  * Y_std + Y_mean

    # 9) Evaluate
    print("\nQSVR on top 300 eigenvalues (amp-encoded):")
    print(f"  MAE = {mean_absolute_error(Y_true, Y_pred):.3f}")
    print(f"  R²  = {r2_score(Y_true, Y_pred):.3f}\n")

if __name__ == "__main__":
    main_qsvr_best300(
        matfile="qm7.mat",
        subset_size=300, #works up to 300
        test_size=0.2,
        num_qubits=5,
        random_seed=42
    )

Loaded 7165 molecules with 23-dim eigenvalues.
Subsampled top 300 best molecules via BEST_IDS_23
Train=240, Test=60, Qubits=5

QSVR on top 300 eigenvalues (amp-encoded):
  MAE = 38.836
  R²  = 0.958

