## N-Qubit Boltzmann Machine

We use a n-qubit system, where the goal is to learn the parameters of the Ising Hamiltonian:

$H = \sum_{i} \sum_{j}\sum_{k} \sum_{k'} w_{ij}^{kk'} \sigma_i^k \otimes \sigma_j^{k'} + \sum_{i}\sum_{k} h^k_i \sigma_{i}^k$

where $w_{ij}^{kk'}$ is the coupling strength, $h_i$ are the local fields, and $\sigma_i^k$ are the Pauli operators. For the Pauli matrices, indexes $k=1,2,3$ correspond to $x,y,z$, index $k=0$ to the identity. The explicit Hamiltonian is written as:

$H = \sum_{k} \sum_{k'} w_{ij}^{kk'} \sigma_0^k \otimes \sigma_1^{k'} + h^k_0 \sigma_0^k \otimes \sigma_1^0 + h^{k'}_1 \sigma_0^0 \otimes \sigma_1^{k'}$

A vector state follows the numbering convention $|s_0, \dots, s_n>$

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
from numba import njit
from itertools import product
from scipy.linalg import kron

In [2]:
def generate_interaction_matrices(num_qubits):
    """
    Constructs the tensor product of Pauli matrices for each spin and 
    uses them to build the interaction matrices between the spins
    """
    pauI = np.array([[1,0],[0,1]],np.complex128)
    pauX = np.array([[0,1],[1,0]],np.complex128)
    pauY = np.array([[0,-1j],[1j,0]],np.complex128)
    pauZ = np.array([[1,0],[0,-1]],np.complex128)
    pau = np.array([pauI, pauX, pauY, pauZ], dtype=np.complex128)
    pauli_labels = ['I', 'X', 'Y', 'Z']
    
    interaction_matrices = []                               # initialize a list to store the interaction matrices
    interaction_labels = []                                 # initialize a list to store the interaction labels
    
    for matrix_index_tuple in product(range(4), repeat=num_qubits): 
        matrix_pair = pau[list(matrix_index_tuple)]
        interaction_matrix = matrix_pair[0]             # the first matrix in the current combination
        for matrix in matrix_pair[1:]:                  # compute the tensor product for rest of the matrices in the current combination
            interaction_matrix = kron(interaction_matrix, matrix)
        interaction_matrices.append(interaction_matrix)         # append the computed tensor product to the list of interaction matrices
        interaction_labels.append(tuple(pauli_labels[i] for i in matrix_index_tuple))  # append the current interaction label

    interaction_matrices = np.array(interaction_matrices, dtype=np.complex128) #make it an array for numba
    return interaction_matrices, interaction_labels   


interactions is a list object containing all interaction matrices. For a 3-qubit system it would look something like this (in order):  
(I, I, I) -> $- \log (Z)$  
(I, I, X) -> $\sigma^x_1$   
(I, I, Y) -> $\sigma^y_1$  
(I, I, Z) -> $\sigma^z_1$  
(I, X, I) -> $\sigma^x_2$  
(I, X, X) -> $\sigma^{xx}_{12}$  
(I, X, Y) -> $\sigma^{yx}_{12}$  
(I, X, Z) -> $\sigma^{zx}_{12}$  
(I, Y, I) -> $\sigma^{y}_2$  
(I, Y, X) -> $\sigma^{xy}_{12}$  
(I, Y, Y) -> $\sigma^{yy}_{12}$  
(I, Y, Z) -> $\sigma^{zy}_{12}$  
...  

Interactions is a 1D array that contains the interaction matrices for all possible interactions ordered as follows:
Thus the weights matrix w[i] also should follow that structure. For a N-qubit system it's hard to keep track of the weights using lexiographic ordering. We want to simplify how we can assign weights:

In [3]:
def generate_weights_symbols(num_qubits):
    pauli_matrices = ['I', 'X', 'Y', 'Z']
    weights_symbols = {}
    for pauli_indices in product(range(4), repeat=num_qubits):
        matrices = [pauli_matrices[index] for index in pauli_indices]
        interaction = tuple(matrices)
        weight_symbol = ",".join([f"{matrix}_{i+1}" for i, matrix in enumerate(matrices) if matrix != 'I'])
        weights_symbols[interaction] = weight_symbol if weight_symbol else "log(Z) = 0 "
    return weights_symbols


In [9]:
def print_matrix(matrix):
    for row in matrix:
        print('[', end='')
        for elem in row:
            real = elem.real
            imag = elem.imag
            if abs(real) < 1e-10 and abs(imag) < 1e-10:
                print("0", end=' ')
            elif abs(real) < 1e-10:
                print(f"{imag:.0f}j", end=' ')
            elif abs(imag) < 1e-10:
                print(f"{real:.0f}", end=' ')
            else:
                print(f"{real:.0f}+{imag:.2f}j", end=' ')
        print(']')
    print()

def test_weights(interaction_matrices, interaction_labels, weights_symbols, weights_values):
    for interaction, label in zip(interaction_matrices, interaction_labels):
        print(f"Interaction: {label}")
        weight_symbol = weights_symbols[label]
        weight_value = weights_values[weight_symbol]
        print(f"Weight symbol: {weight_symbol}")
        print(f"Weight value: {weight_value}")
        print("Interaction matrix: ")
        print_matrix(interaction)
        print()


for a 3 qubit system there will be 4^3 = 128 total interactions. We set 3 body interactions to zero.

In [10]:
num_qubits = 3
interaction_matrices, interaction_labels = generate_interaction_matrices(num_qubits)
weights_symbols = generate_weights_symbols(num_qubits)

weights_values = {
    "log(Z) = 0 ": 1.0,
    "X_1": 0.5,
    "Y_1": 0.6,
    "Z_1": 0.7,
    "X_2": 0.8,
    "Y_2": 0.9,
    "Z_2": 1.1,
    "X_3": 1.2,
    "Y_3": 1.3,
    "Z_3": 1.4,
    "X_1,X_2": 0.15,
    "X_1,Y_2": 0.16,
    "X_1,Z_2": 0.17,
    "Y_1,X_2": 0.18,
    "Y_1,Y_2": 0.19,
    "Y_1,Z_2": 0.20,
    "Z_1,X_2": 0.21,
    "Z_1,Y_2": 0.22,
    "Z_1,Z_2": 0.23,
    "X_1,X_3": 0.24,
    "X_1,Y_3": 0.25,
    "X_1,Z_3": 0.26,
    "Y_1,X_3": 0.27,
    "Y_1,Y_3": 0.28,
    "Y_1,Z_3": 0.29,
    "Z_1,X_3": 0.30,
    "Z_1,Y_3": 0.31,
    "Z_1,Z_3": 0.32,
    "X_2,X_3": 0.33,
    "X_2,Y_3": 0.34,
    "X_2,Z_3": 0.35,
    "Y_2,X_3": 0.36,
    "Y_2,Y_3": 0.37,
    "Y_2,Z_3": 0.38,
    "Z_2,X_3": 0.39,
    "Z_2,Y_3": 0.40,
    "Z_2,Z_3": 0.41,
}

pauli_matrices = ['I', 'X', 'Y', 'Z']
# Set 3-body interactions to 0
for i in range(4):
    for j in range(4):
        for k in range(4):
            if i != 0 and j != 0 and k != 0:
                weights_values[f"{pauli_matrices[i]}_1,{pauli_matrices[j]}_2,{pauli_matrices[k]}_3"] = 0.0

# Run the test_weights function
test_weights(interaction_matrices, interaction_labels, weights_symbols, weights_values)

Interaction: ('I', 'I', 'I')
Weight symbol: log(Z) = 0 
Weight value: 1.0
Interaction matrix: 
[1 0 0 0 0 0 0 0 ]
[0 1 0 0 0 0 0 0 ]
[0 0 1 0 0 0 0 0 ]
[0 0 0 1 0 0 0 0 ]
[0 0 0 0 1 0 0 0 ]
[0 0 0 0 0 1 0 0 ]
[0 0 0 0 0 0 1 0 ]
[0 0 0 0 0 0 0 1 ]


Interaction: ('I', 'I', 'X')
Weight symbol: X_3
Weight value: 1.2
Interaction matrix: 
[0 1 0 0 0 0 0 0 ]
[1 0 0 0 0 0 0 0 ]
[0 0 0 1 0 0 0 0 ]
[0 0 1 0 0 0 0 0 ]
[0 0 0 0 0 1 0 0 ]
[0 0 0 0 1 0 0 0 ]
[0 0 0 0 0 0 0 1 ]
[0 0 0 0 0 0 1 0 ]


Interaction: ('I', 'I', 'Y')
Weight symbol: Y_3
Weight value: 1.3
Interaction matrix: 
[0 -1j 0 0 0 0 0 0 ]
[1j 0 0 0 0 0 0 0 ]
[0 0 0 -1j 0 0 0 0 ]
[0 0 1j 0 0 0 0 0 ]
[0 0 0 0 0 -1j 0 0 ]
[0 0 0 0 1j 0 0 0 ]
[0 0 0 0 0 0 0 -1j ]
[0 0 0 0 0 0 1j 0 ]


Interaction: ('I', 'I', 'Z')
Weight symbol: Z_3
Weight value: 1.4
Interaction matrix: 
[1 0 0 0 0 0 0 0 ]
[0 -1 0 0 0 0 0 0 ]
[0 0 1 0 0 0 0 0 ]
[0 0 0 -1 0 0 0 0 ]
[0 0 0 0 1 0 0 0 ]
[0 0 0 0 0 -1 0 0 ]
[0 0 0 0 0 0 1 0 ]
[0 0 0 0 0 0 0 -1 ]


Interaction:

Let's test if this works as intended:

In [None]:
def generate_interaction_matrix(interaction):
    """
    Constructs the tensor product of Pauli matrices for the given spin interaction
    """
    pauI = np.array([[1,0],[0,1]],np.complex128)
    pauX = np.array([[0,1],[1,0]],np.complex128)
    pauY = np.array([[0,-1j],[1j,0]],np.complex128)
    pauZ = np.array([[1,0],[0,-1]],np.complex128)
    pau = {'I': pauI, 'X': pauX, 'Y': pauY, 'Z': pauZ}
    
    interaction_matrix = pau[interaction[0]]
    for matrix_label in interaction[1:]:
        interaction_matrix = np.kron(interaction_matrix, pau[matrix_label])
    return interaction_matrix


In [None]:
num_qubits = 3
interactions = list(product(['I', 'X', 'Y', 'Z'], repeat=num_qubits))
weights_symbols = generate_weights_symbols(num_qubits)
weights_values = {
    "log(Z) = 0 ": 1.0,
    "X_1": 0.5,
    "Y_1": 0.6,
    "Z_1": 0.7,
    "X_2": 0.8,
    "Y_2": 0.9,
    "Z_2": 1.1,
    "X_3": 1.2,
    "Y_3": 1.3,
    "Z_3": 1.4,
    "X_1,X_2": 0.15,
    "X_1,Y_2": 0.16,
    "X_1,Z_2": 0.17,
    "Y_1,X_2": 0.18,
    "Y_1,Y_2": 0.19,
    "Y_1,Z_2": 0.20,
    "Z_1,X_2": 0.21,
    "Z_1,Y_2": 0.22,
    "Z_1,Z_2": 0.23,
    "X_1,X_3": 0.24,
    "X_1,Y_3": 0.25,
    "X_1,Z_3": 0.26,
    "Y_1,X_3": 0.27,
    "Y_1,Y_3": 0.28,
    "Y_1,Z_3": 0.29,
    "Z_1,X_3": 0.30,
    "Z_1,Y_3": 0.31,
    "Z_1,Z_3": 0.32,
    "X_2,X_3": 0.33,
    "X_2,Y_3": 0.34,
    "X_2,Z_3": 0.35,
    "Y_2,X_3": 0.36,
    "Y_2,Y_3": 0.37,
    "Y_2,Z_3": 0.38,
    "Z_2,X_3": 0.39,
    "Z_2,Y_3": 0.40,
    "Z_2,Z_3": 0.41,
}

# Set 3-body interactions to 0
for i in range(4):
    for j in range(4):
        for k in range(4):
            if i != 0 and j != 0 and k != 0:
                weights_values[f"{pauli_matrices[i]}_1,{pauli_matrices[j]}_2,{pauli_matrices[k]}_3"] = 0.0

# Run the test_weights function
test_weights(interaction_matrices, weights_symbols, weights_values)


In [None]:
@njit
def expmat(A):
    """
    Computes the exponential of a given matrix `A'.
    """
    A = 0.5 * (A + np.transpose(np.conjugate(A)))
    evals, evecs = np.linalg.eigh(A)
    N = len(evals)
    res = np.zeros((N,N),np.complex128)
    for i in range(N):
        eigenvector = evecs[:,i]
        projector = np.outer(eigenvector,eigenvector.conj())
        res += np.exp(evals[i]) * projector
    return res

@njit
def logmat(A):
    """
    Computes the natural logarithm of a given matrix `A`.
    """
    A = 0.5 * (A + np.transpose(np.conjugate(A)))
    evals, evecs = np.linalg.eigh(A)
    N = len(evals)
    res = np.zeros((N,N),np.complex128)
    for i in range(N):
        eigenvector = evecs[:,i]
        projector = np.outer(eigenvector,eigenvector.conj())
        res += np.log(evals[i]) * projector
    return res

In [None]:
@njit
def hamiltonian_n_qubits(w, interaction_matrices, num_qubits):
    """
    Calculates the Hamiltonian matrix of an n-qubit system
    """
    H = np.zeros((2**num_qubits, 2**num_qubits), dtype=np.complex128)
    for i in range(len(w)):
        H += w[i] * interaction_matrices[i]

    return H


@njit
def rho_model(w, interaction_matrices, num_qubits):
    """
    Computes the density matrix of an n-qubit system using the Hamiltonian
    and the interaction matrices, and then normalizes it. Exact Diagonalization.
    """
    H = hamiltonian_n_qubits(w, interaction_matrices, num_qubits)  # get Hamiltonian matrix
    rho = expmat(H)                                        # definition of rho
    Z = np.real(np.trace(rho))                             # get Z
    rho /= Z                                               # normalize such that Tr[rho] = 1
    return rho 

@njit
def observables(rho, interaction_matrices):
    """
    Computes the expectation values of the interaction matrices [observables]
    """
    obs = np.zeros(len(interaction_matrices), dtype=np.complex128)            # initialize obserables array to store
    rho_contig = np.ascontiguousarray(rho)                                    # make rho and interaction matrices contiguious for numba
    interaction_matrix_contig = np.ascontiguousarray(interaction_matrix)

    for i, interaction_matrix in enumerate(interaction_matrices):   
        obs[i] = np.real(np.trace(np.dot(rho_contig, interaction_matrix_contig))) # compute the expectation value for the current interaction matrix
    return obs

@njit
def KL_divergence(eta,rho):
    """
    Calculates the KL divergence between the model and target distribution.
    """
    return np.real(np.trace(eta@(logmat(eta)-logmat(rho))))

@njit
def QM_likelihood(eta,rho):
    """
    Calculates the quantum Likelihood of the distribution
    """
    return -np.real(np.trace(eta @ logmat(rho)))

In [None]:
###  PLOTTING FUNCTIONS
#----------------------------------------------------------------------
def plot(it, Wmax, kl, lk, title='Convergence Plots', size=10):
    fig = plt.figure(figsize=(25, 6));                           #  make plots
    fig.suptitle(title, fontsize=30, y = 1)
    trans = mtransforms.ScaledTranslation(-20/72, 7/72, fig.dpi_scale_trans)
    its= np.arange(1,it+1,1)

    ax1 = fig.add_subplot(1, 3, 1)
    ax1.scatter(its, Wmax[:it], s=size, marker='o', color="Coral")
    ax1.set_xlabel("Iterations", fontsize=20)
    ax1.set_ylabel(r"$ (\Delta w)_{max}$", fontsize=20)
    ax1.set_yscale('log')
    ax1.text(0, 1.0, 'A.)', transform=ax1.transAxes + trans, fontsize='large',fontweight ='bold', va='bottom', fontfamily='sans-serif')
   

    ax2 = fig.add_subplot(1, 3, 2)
    ax2.scatter(its, kl[:it], s=size, marker='o', color="ForestGreen")
    ax2.set_xlabel("Iterations", fontsize=20)
    ax2.set_ylabel("KL Divergence", fontsize=20)
    ax2.set_yscale('log')
    ax2.text(0, 1.0, 'B.)', transform=ax2.transAxes + trans, fontsize='large',fontweight ='bold', va='bottom', fontfamily='sans-serif')

    ax3 = fig.add_subplot(1, 3, 3)
    ax3.scatter(its, lk[:it], s=size, marker='o', color="HotPink")
    ax3.set_xlabel("Iterations", fontsize=20)
    ax3.set_ylabel("Log Likelihood", fontsize=20)
    ax3.set_yscale('log')
    ax3.set_ylim([0.482015, 0.482030])
    ax3.text(0, 1.0, 'B.)', transform=ax3.transAxes + trans, fontsize='large',fontweight ='bold', va='bottom', fontfamily='sans-serif')

In [None]:
@njit
def generate_random_parameter_matrix(random_seed, num_qubits):
    """
    Generates a random parameter matrix w for the random seed value random_seed.
    """
    np.random.seed(random_seed)        # set the random seed value
    num_params = 4**num_qubits      # calculate the number of parameters for the weight matrix
    w = np.random.rand(num_params)  # generate a random parameter matrix w of size num_params
    w[0] = 0

    return w

@njit
def quantum_boltzmann_machine(interaction_matrices,learning_rate, maxiter, tolerance, random_seed, w_eta=None, eta=None)
    """
    Train the model to fit the target distribution eta.
    """
    # compute target density matrix using ED if oracle w_eta is provided, otherwise use provided eta
    if w_eta is not None:
        eta = rho_model(w_eta, interactions)               
    obs_clamped = observables(eta, interaction_matrices, num_qubits)   # get clamped QM statistics

    w = generate_random_parameter_matrix(random_seed + 1, num_qubits)  # get initial weights for free QM statistics
    rho = rho_model(w, interaction_matrices, num_qubits)               # generate the density matrix rho for the initial weights
    obs_model = observables(rho, interaction_matrices, num_qubits)     # get free QM statistics

    it = 0                                                 #  initialize gradient ascent loop
    diff = np.inf
    Wmax_list, lk_list, kl_list = np.zeros(maxiter), np.zeros(maxiter), np.zeros(maxiter)  #  initialize values to store

    while (diff > tolerance and it < maxiter):
        rho = rho_model(w, interaction_matrices, num_qubits)  # get free QM statistics
        obs_model = observables(rho, interaction_matrices, num_qubits)
        w += learning_rate * np.real(obs_clamped - obs_model)  # Update weights

        diff = np.max(np.abs(obs_model - obs_clamped))  # evaluate differences in clamped and model statistics
        Wmax = np.max(np.abs(w - w_eta))

        if w_eta is None:
            W_diff = np.max(np.abs(w - w_previous))

        elif w_eta is not None:
            Wmax = np.max(np.abs(w-w_eta))
            Wmax_list[it] = Wmax                                #  store values

        lk_list[it]  = QM_likelihood(eta,rho)
        kl_list[it]  = KL_divergence(eta,rho) 
        it += 1

    return w, lk_list, kl_list, Wmax_list, it


# Results 2-qubit system

In [None]:
def generate_w(wx1x2, wy1y2, w1z2z, hx1, hx2, hy1, hy2, hz1, hz2):
    return np.array([
        [0,  hx2, hy2, hz2],
        [hx1, wx1x2, 0,  0],
        [hy1, 0,  wy1y2, 0],
        [hz1, 0,  0,  w1z2z]
    ])

#interaction parameters
wx1x2 = 0
wy1y2 = 0
wz1z2 = 2
hx1   = 0
hx2   = 0
hy1   = 0
hy2   = 0
hz1   = 1
hz2   = 1
    
w = generate_w(wx1x2 ,wy1y2, wz1z2, hx1, hx2, hy1, hy2, hz1, hz2)

#learning parameters
learning_rate  = 0.6      #  learning rate 
maxiter        = 2**20    #  iterations
tolerance      = 1e-14    #  tolerance
random_seed    = 555

#get interaction matrices
num_qubits     = 2
interaction_matrices  = generate_interaction_matrices(num_qubits)     

In [None]:
w, w_eta, KL, Wmax, it = learn_w(interaction_matrices, learning_rate, maxiter, tolerance, random_seed, num_qubits)
plot(it, KL, Wmax, title=f'Convergence Plots for {num_qubits} qubits', size=10)
plt.savefig('Figures/N Qubit QBM/convergence', bbox_inches='tight')

In [None]:
np.set_printoptions(formatter={'float_kind':"{:.6f}".format}) # print matrices in 2 decimals
#np.set_printoptions(formatter={'complex_kind': lambda x: "{:.2f}+{:.2f}j".format(x.real, x.imag)})

print(np.abs(w-w_eta))

# Results 3-qubit system

In [None]:
#learning parameters
learning_rate = 0.1    #  learning rate 
maxiter  = 200         #  iterations
tolerance = 1e-10      #  tolerance
random_seed = 555
num_qubits = 6

interactions     = generate_interaction_matrices(num_qubits)     #  generate interaction matrices

In [None]:
interaction_matrices     = generate_interaction_matrices(6)     #  generate interaction matrices]
print(interaction_matrices.shape)

In [None]:
w, w_eta, KL, Wmax, it = learn_w(interactions, learning_rate, maxiter, tolerance, random_seed, num_qubits)
plot(it, KL, Wmax, title='Convergence Plots', size=10)
plt.savefig('Figures/N Qubit QBM/convergence', bbox_inches='tight')
print(Wmax)

In [None]:
np.set_printoptions(formatter={'float_kind':"{:.6f}".format}) # print matrices in 2 decimals
#np.set_printoptions(formatter={'complex_kind': lambda x: "{:.2f}+{:.2f}j".format(x.real, x.imag)})

print(np.abs(w-w_eta))

# Results 4-Qubit system

In [None]:
#learning parameters
learning_rate       = 0.5      #  learning rate 
maxiter  = 2**16    #  iterations
tolerance      = 1e-10    #  tolerance
random_seed = 555
num_qubits = 4

interactions     = generate_interaction_matrices(num_qubits)     #  generate interaction matrices

In [None]:
w, w_eta, KL, Wmax, it = learn_w(interaction_matrices, learning_rate, maxiter, tolerance, eta_seed, num_qubits)
plot(it, KL, Wmax, title='Convergence Plots', size=10)
plt.savefig('Figures/N Qubit QBM/convergence', bbox_inches='tight')

In [None]:
np.set_printoptions(formatter={'float_kind':"{:.6f}".format}) # print matrices in 2 decimals
#np.set_printoptions(formatter={'complex_kind': lambda x: "{:.2f}+{:.2f}j".format(x.real, x.imag)})

print(np.abs(w-w_eta))

# Test ground