## 2-Qubit Boltzmann Machine

We use a 2-qubit system, where the goal is to learn the parameters of the Ising Hamiltonian:

$H = \sum_{k} \sum_{k'} w_{ij}^{kk'} \sigma_0^k \otimes \sigma_1^{k'} + h^k_0 \sigma_0^k + h^{k'}_1 \sigma_1^{k'} $

where $w_{ij}^{kk'}$ is the coupling strength, $h_i$ are the local fields, and $\sigma_i^k$ are the Pauli operators. For the Pauli matrices, indexes $k=1,2,3$ correspond to $x,y,z$, index $k=0$ to the identity. The explicit Hamiltonian is written as:

$H = \sum_{k} \sum_{k'} w_{ij}^{kk'} \sigma_0^k \otimes \sigma_1^{k'} + h^k_0 \sigma_0^k \otimes \sigma_1^0 + h^{k'}_1 \sigma_0^0 \otimes \sigma_1^{k'}$


The parameter matrix "w" is written in the Z-basis. It will have all zeros in column 2 and row 2, except for the diagonal term "h". The row index corresponds to spin 0 and the column to spin 1.

* $w[0,k] = h_1^k$ is the external field on spin $1$; $k = 1,2,3$
* $w[k,0] = h_0^k$ is the external field on spin $0$; $k = 1,2,3$
* $w[0,0] = -\log Z$

For the external field, $h_0$ and $h_1$ both have a zero in its y-component ($h_0[1] = h_1[1] = 0$)

A vector state follows the numbering convention $|s_0, \dots, s_n>$

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
from numba import njit

In [8]:
@njit
def expmat(A):
    """
    Computes the exponential of a given matrix `A'.
    """
    A = 0.5 * (A + np.transpose(np.conjugate(A)))
    evals, evecs = np.linalg.eigh(A)
    N = len(evals)
    res = np.zeros((N,N),np.complex64)
    for i in range(N):
        eigenvector = evecs[:,i]
        projector = np.outer(eigenvector,eigenvector.conj())
        res += np.exp(evals[i]) * projector
    return res

@njit
def logmat(A):
    """
    Computes the natural logarithm of a given matrix `A`.
    """
    A = 0.5 * (A + np.transpose(np.conjugate(A)))
    evals, evecs = np.linalg.eigh(A)
    N = len(evals)
    res = np.zeros((N,N),np.complex64)
    for i in range(N):
        eigenvector = evecs[:,i]
        projector = np.outer(eigenvector,eigenvector)
        res += np.log(evals[i]) * projector
    return res

def generate_interaction_matrices():
    """
    Constructs the tensor product of Pauli matrices for each spin and 
    uses them to build the interaction matrices between the two spins.
    """
    pauI = np.array([[1,0],[0,1]],np.complex64)
    pauX = np.array([[0,1],[1,0]],np.complex64)
    pauY = np.array([[0,-1j],[1j,0]],np.complex64)
    pauZ = np.array([[1,0],[0,-1]],np.complex64)
    pau = np.array([pauI, pauX, pauY, pauZ], dtype=np.complex64)

    interactions = np.zeros((4, 4, 4, 4), dtype=np.complex64)
    for k in range(4):
        for kprime in range(4):
            interactions[k, kprime] = np.kron(pau[k], pau[kprime])
    return interactions


@njit
def hamiltonian2spins(w,interactions):
    """
    Calculates the Hamiltonian matrix of a two-spin system 
    """
    H = np.zeros((4,4),np.complex64)
    for k in range(4):
        for kprime in range(4):
            H += w[k,kprime] * interactions[k,kprime]
    return H

@njit
def rho_model(w,interactions):
    """
    Computes the density matrix of a two-spin system using the Hamiltonian
    and the interaction matrices, and then normalizes it. Exact Diagonalization.
    """
    H      = hamiltonian2spins(w,interactions)        #  get hamiltonian matrix
    rho    = expmat(H)                                #  defintion of rho
    Z      = np.real(np.trace(rho))                   #  get Z
    rho   /= Z                                        #  normalize such that Tr[rho] = 1
    return rho

@njit
def observables(rho, interactions):
    """
    Computes the expectation values of the interaction matrices [observables]
    """
    obs = np.zeros((4,4))
    for k in range(4):
        for kprime in range(4):
            obs[k,kprime] = np.real(np.trace(rho@interactions[k,kprime]))
    return obs

@njit
def KL_divergence(eta,rho):
    """
    Calculates the KL divergence between the model and target distribution.
    """
    return np.real(np.trace(eta@(logmat(eta)-logmat(rho))))

@njit
def QM_likelihood(eta,rho):
    """
    Calculates the quantum Likelihood of the distribution
    """
    return -np.real(np.trace(eta @ logmat(rho)))

# @njit
def check_density_matrices(eta, rho):
    """
    Checks if the density matrices `eta` and `rho` are positive definite 
    and that their trace is equal to 1. If not, return an error message and 
    the violating density matrix.
    """
    # Check if eta and rho are positive definite
    eta_eigenvalues = np.linalg.eigvals(eta)
    rho_eigenvalues = np.linalg.eigvals(rho)
    
    if not np.all(eta_eigenvalues > 0):
        return "Error: Density matrix eta is not positive definite", eta
    
    if not np.all(rho_eigenvalues > 0):
        return "Error: Density matrix rho is not positive definite", rho
    
    # Check if the trace of eta and rho is equal to 1
    eta_trace = np.real(np.trace(eta))
    rho_trace = np.real(np.trace(rho))
    
    if not np.isclose(eta_trace, 1):
        return f"Error: Trace of density matrix eta is not equal to 1, but {eta_trace}", eta
    
    if not np.isclose(rho_trace, 1):
        return f"Error: Trace of density matrix rho is not equal to 1, but {rho_trace}", rho
    
    # If eta and rho passed all checks, return None
    return None, None

# @njit
def check_kl_likelihood(eta, rho):
    """
    Checks if the KL divergence or likelihood becomes negative, and if so,
    returns an error message and the violating eta and rho.
    """
    kl_divergence = KL_divergence(eta, rho)
    likelihood = QM_likelihood(eta, rho)

    if kl_divergence < 0:
        return f"Error: KL divergence is negative: {kl_divergence}", eta, rho

    if likelihood < 0:
        return f"Error: Likelihood is negative: {likelihood}", eta, rho

    return None, None, None


In [9]:
###  PLOTTING FUNCTIONS
#----------------------------------------------------------------------
def plot_scatter(ax, x, y, xlabel, ylabel, color, size=10):
    '''Creates a scatter subplot'''
    ax.scatter(x, y, s=size, marker='o', color=color)
    ax.set_xlabel(xlabel, fontsize=20)
    ax.set_ylabel(ylabel, fontsize=20)
    ax.set_yscale('log')

def plot(it, KL, Wmax, title='Convergence Plots', size=10):
    fig = plt.figure(figsize=(25, 6));                           #  make plots
    fig.suptitle(title, fontsize=30, y = 1)
    trans = mtransforms.ScaledTranslation(-20/72, 7/72, fig.dpi_scale_trans)
    its= np.arange(1,it+1,1)

    ax1 = fig.add_subplot(1, 2, 1)
    plot_scatter(ax1, its, Wmax[:it], "Iterations", r"$ (\Delta w)_{max}$", "Coral", size)
    ax1.text(0, 1.0, 'A.)', transform=ax1.transAxes + trans, fontsize='large',fontweight ='bold', va='bottom', fontfamily='sans-serif')
   
    ax2 = fig.add_subplot(1, 2, 2)
    ax2.scatter(its, KL[:it], s=size, marker='o', color="ForestGreen")
    ax2.set_xlabel("Iterations", fontsize=20)
    ax2.set_ylabel("KL Divergence", fontsize=20)
    ax2.text(0, 1.0, 'B.)', transform=ax2.transAxes + trans, fontsize='large',fontweight ='bold', va='bottom', fontfamily='sans-serif')

In [10]:
@njit
def generate_obs(w, interactions):
    rho  = rho_model(w, interactions)         # compute density matrix using ED
    obs  = observables(rho, interactions)     # get QM statistics  
    return rho, obs

@njit
def generate_random_parameter_matrix(seed):
    np.random.seed(seed)              # set random seed for reproducibility
    w = np.random.randn(4, 4)         # get normal distributed weights
    w[0, 0] = 0
    return w

# @njit
def quantum_boltzmann_machine(interactions, lr, maxiter, tol, random_seed, w_eta=None, eta=None):
    """
    Train the model to fit the target distribution eta
    """
    if w_eta is not None:
        eta = rho_model(w_eta, interactions)               # compute density matrix using ED if w_eta is provide
    obs_clamped  = observables(eta, interactions)          # get QM clamped statistics  
    
    w    = generate_random_parameter_matrix(random_seed + 1)  # initialize random parameters
    rho  = rho_model(w, interactions)                      # compute density matrix using ED
    obs  = observables(rho, interactions)                  # get QM statistics  
    
    error_message, error_matrix = check_density_matrices(eta, rho)
    if error_message is not None:
        print(error_message)
        return error_matrix
    error_message, error_eta, error_rho = check_kl_likelihood(eta, rho)
    if error_message is not None:
        print(error_message)
        return error_eta, error_rho
    
    np.set_printoptions(precision=2, suppress=True)
    
    it = 0                                                 #  initialize gradient ascent loop
    diff = np.max(np.abs(obs-obs_clamped))
    Wmax_list = np.zeros(maxiter)                          #  initialize values to store
    lk_list   = np.zeros(maxiter)
    kl_list   = np.zeros(maxiter)                          #  keibler lubach divergence


    while (diff > tol and it < maxiter): 
        rho  = rho_model(w, interactions)                   # compute density matrix using ED

        error_message, error_matrix = check_density_matrices(eta, rho)
        if error_message is not None:
            print(error_message)
            return error_matrix
        error_message, error_eta, error_rho = check_kl_likelihood(eta, rho)
        if error_message is not None:
            print(error_message)
            return error_eta, error_rho

        obs  = observables(rho, interactions)               # get QM statistics  
        w   += lr * (obs_clamped -  obs)                    #  update parameters
        
        diff = np.max(np.abs(obs - obs_clamped))            #  evaluate differences in clamped and model statistics   
        if w_eta is not None:
            Wmax = np.max(np.abs(w-w_eta))
            Wmax_list[it] = Wmax                                #  store values
        
        lk_list[it]  = QM_likelihood(eta,rho)
        kl_list[it]  = KL_divergence(eta,rho) 
        it += 1 
    return w, rho,lk_list, kl_list, Wmax_list, it

In [11]:
#learning parameters
lr       = 0.4      #  learning rate 
maxiter  = 10    #  iterations
tol      = 1e-10    #  tolerance
random_seed = 555

interactions     = generate_interaction_matrices()     #  generate interaction matrices
w_eta = generate_random_parameter_matrix(random_seed)

In [12]:
# w, rho, lk, kl, Wmax, it = quantum_boltzmann_machine(interactions, lr, maxiter, tol, random_seed, w_eta=w_eta, eta=None)

# plot(it, lk, Wmax, title='Convergence Plots', size=10)
# print(lk)

Error: Likelihood is negative: -0.04864981770515442
1.0
1.0
[0.82 0.17 0.   0.  ]
[0.73 0.26 0.01 0.  ]
[[  1.87+0.j  -1.06+0.j   5.62+0.j  -4.95+0.j]
 [ -1.06+0.j   1.15+0.j  -2.32+0.j   2.29+0.j]
 [  5.62+0.j  -2.32+0.j  21.07+0.j -17.63+0.j]
 [ -4.95+0.j   2.29+0.j -17.63+0.j  15.3 +0.j]]
[[ 0.49+0.j   -0.15-0.07j  0.17-0.15j  0.04+0.07j]
 [-0.15+0.07j  0.09+0.j   -0.08+0.04j -0.05-0.04j]
 [ 0.17+0.15j -0.08-0.04j  0.32+0.j    0.12+0.09j]
 [ 0.04-0.07j -0.05+0.04j  0.12-0.09j  0.1 +0.j  ]]


In [13]:
fig = plt.figure(figsize=(25, 6));                           #  make plots

diff = np.diff(kl)
its= np.arange(1,it,1)
its2= np.arange(1,it+1,1)
ax1 = fig.add_subplot(1, 2, 1)
ax1.scatter(its2[:10], kl[:10], s=3, marker='o', color="ForestGreen")

ax2 = fig.add_subplot(1, 2, 2)
ax2.scatter(its[1000:], diff[1000:], s=3, marker='o', color="ForestGreen")

# plt.savefig('Figures/2 Qubit QBM/seed555lr04', bbox_inches='tight')


NameError: name 'kl' is not defined

<Figure size 2500x600 with 0 Axes>

## Error analysis Eta and rho remain positive definite and trace 1 for the negative KL values, so what's the problem?

In [16]:
eta, rho = quantum_boltzmann_machine(interactions, lr, maxiter, tol, random_seed, w_eta=w_eta, eta=None)
print(np.real(np.trace(eta)))
print(np.real(np.trace(rho)))

print(np.real(np.linalg.eigvals(eta)))
print(np.real(np.linalg.eigvals(rho)))

QM_likelihood(eta,rho)


Error: Likelihood is negative: -0.04864981770515442
1.0
1.0
[0.82 0.17 0.   0.  ]
[0.73 0.26 0.01 0.  ]


-0.04864981770515442

### Problem in logmat expmat?

In [17]:
print((expmat(logmat(rho))))
print(rho)

[[  1.87+0.j  -1.06+0.j   5.62+0.j  -4.95+0.j]
 [ -1.06+0.j   1.15+0.j  -2.32+0.j   2.29+0.j]
 [  5.62+0.j  -2.32+0.j  21.07+0.j -17.63+0.j]
 [ -4.95+0.j   2.29+0.j -17.63+0.j  15.3 +0.j]]
[[ 0.49+0.j   -0.15-0.07j  0.17-0.15j  0.04+0.07j]
 [-0.15+0.07j  0.09+0.j   -0.08+0.04j -0.05-0.04j]
 [ 0.17+0.15j -0.08-0.04j  0.32+0.j    0.12+0.09j]
 [ 0.04-0.07j -0.05+0.04j  0.12-0.09j  0.1 +0.j  ]]


In [None]:
np.set_printoptions(formatter={'float_kind':"{:.6f}".format}) # print matrices in 2 decimals
#np.set_printoptions(formatter={'complex_kind': lambda x: "{:.2f}+{:.2f}j".format(x.real, x.imag)})

print(np.abs(w-w_eta))

In [None]:
def generate_w(wx1x2, wy1y2, w1z2z, hx1, hx2, hy1, hy2, hz1, hz2):
    return np.array([
        [0,  hx2, hy2, hz2],
        [hx1, wx1x2, 0,  0],
        [hy1, 0,  wy1y2, 0],
        [hz1, 0,  0,  w1z2z]
    ])

#interaction parameters
wx1x2 = 0
wy1y2 = 0
wz1z2 = 2
hx1   = 0
hx2   = 0
hy1   = 0
hy2   = 0
hz1   = 1
hz2   = 1
    
w = generate_w(wx1x2 ,wy1y2, wz1z2, hx1, hx2, hy1, hy2, hz1, hz2)