# Comparing the performance of optimizers

In [1]:
import pennylane as qml
import numpy as np
from qiskit import IBMQ
import itertools
import matplotlib.pyplot as plt
import pickle
import scipy

## Hardware-friendly circuit

In [2]:
n_wires = 5

In [3]:
n_shots_list = [10, 100, 1000]
devs = [qml.device("default.qubit", wires=n_wires, shots=shots, analytic=False) for shots in n_shots_list]
devs.append(qml.device("default.qubit", wires=n_wires))

In [4]:
devs

[<DefaultQubit device (wires=5, shots=10) at 0x7f5cedd52750>,
 <DefaultQubit device (wires=5, shots=100) at 0x7f5cedce7090>,
 <DefaultQubit device (wires=5, shots=1000) at 0x7f5c8cf84250>,
 <DefaultQubit device (wires=5, shots=1000) at 0x7f5c8cfe59d0>]

In [5]:
def layers_circ(weights):
    for i in range(n_wires):
        qml.RX(weights[i], wires=i)

    qml.CNOT(wires=[0, 1])
    qml.CNOT(wires=[2, 1])
    qml.CNOT(wires=[3, 1])
    qml.CNOT(wires=[4, 3])
    return qml.expval(qml.PauliZ(1))

In [6]:
layers = [qml.QNode(layers_circ, d) for d in devs]

In [7]:
seed = 2
weights = qml.init.basic_entangler_layers_uniform(n_layers=1, n_wires=5, seed=seed).flatten()
weights

tensor([2.73943676, 0.16289932, 3.4536312 , 2.73521126, 2.6412488 ], requires_grad=True)

In [8]:
grads = [qml.grad(l, argnum=0) for l in layers]

In [9]:
[l(weights) for l in layers]

[-1.0, -0.8, -0.794, -0.7938055593697134]

In [10]:
g_exact = np.round(grads[-1](weights), 7)
g_exact

array([-0.3376347,  0.1304665,  0.2560632, -0.3416029,  0.       ])

## Calculating the Hessian

In [11]:
s = 0.5 * np.pi
denom = 4 * np.sin(s) ** 2
shift = np.eye(len(weights))
LAMBDA = 0.2 # regulirization parameter for the Hessian
lr_gds = 0.15
lr_newton = 0.15
#weights[0] = 1.8
#weights[1] = 2.2
weights[0] = 0.1
weights[1] = 0.15
ARGS = 2



def is_pos_def(x):
    return np.all(np.linalg.eigvals(x) > 0)


# First method
def regularize_hess(hess, lr):
    return (1 / lr_newton) * (hess + LAMBDA * np.eye(len(hess)))

def regularize_diag_hess(hess, lr):
    return (1 / lr_newton) * (hess + LAMBDA)

# Second method
def regularize_hess(hess, lr):
    if is_pos_def(hess - LAMBDA * np.eye(len(hess))):
        return (1 / lr_newton) * hess
    return (1 / lr) * np.eye(len(hess))

def regularize_diag_hess(hess, lr):
    if np.all(hess - LAMBDA > 0):
        return (1 / lr_newton) * hess
    return (1 / lr) * np.ones(len(hess))

# Third method
def regularize_hess(hess, lr):
    abs_hess = scipy.linalg.sqrtm(hess @ hess)
    return (1 / lr_newton) * (abs_hess + LAMBDA * np.eye(len(hess)))

def regularize_diag_hess(hess, lr):
    return (1 / lr_newton) * (np.abs(hess) + LAMBDA)

# Forth method
def regularize_hess(hess, lr):
    eig_vals, eig_vects = np.linalg.eig(hess)
    epsilon = LAMBDA * np.ones(len(hess))
    regul_eig_vals = np.max([eig_vals, epsilon], axis=0)
    return (1 / lr_newton) * eig_vects @ np.diag(regul_eig_vals) @ np.linalg.inv(eig_vects)

def regularize_diag_hess(hess, lr):
    epsilon = LAMBDA * np.ones(len(hess))
    return (1 / lr_newton) *  np.max([hess, epsilon], axis=0)



def hess_gen_results(func, weights, args=None):
    
    results = {}
    
    if not args:
        args = len(weights)
    
    for c in itertools.combinations(range(args), r=2):
        weights_pp = weights + s * (shift[c[0]] + shift[c[1]])
        weights_pm = weights + s * (shift[c[0]] - shift[c[1]])
        weights_mp = weights - s * (shift[c[0]] - shift[c[1]])
        weights_mm = weights - s * (shift[c[0]] + shift[c[1]])

        f_pp = func(weights_pp)
        f_pm = func(weights_pm)
        f_mp = func(weights_mp)
        f_mm = func(weights_mm)
        results[c] = (f_pp, f_mp, f_pm, f_mm)
    
    f = func(weights)
    
    for i in range(args):
        f_p = func(weights + 0.5 * np.pi * shift[i])
        f_m = func(weights - 0.5 * np.pi * shift[i])
        results[(i, i)] = (f_p, f_m, f)

    return results


def hess_diag_gen_results(func, weights, args=None):
    
    results = {}
    
    if not args:
        args = len(weights)
    
    f = func(weights)
    
    for i in range(args):
        f_p = func(weights + 0.5 * np.pi * shift[i])
        f_m = func(weights - 0.5 * np.pi * shift[i])
        results[(i, i)] = (f_p, f_m, f)

    return results


def grad_gen_results(func, weights, args=None):
    results = {}
    
    if not args:
        args = len(weights)
    
    for i in range(args):
        f_p = func(weights + 0.5 * np.pi * shift[i])
        f_m = func(weights - 0.5 * np.pi * shift[i])
        results[i] = (f_p, f_m)
    
    return results


def get_hess_diag(func, weights, args=None):
    if not args:
        args = len(weights)
        
    hess = np.zeros(args)
    results = hess_diag_gen_results(func, weights, args)
    
    for i in range(args):
        r = results[(i, i)]
        hess[i] = (r[0] + r[1] - 2 * r[2]) / 2
    
    grad = np.zeros(args)
    
    for i in range(args):
        r = results[(i, i)]
        grad[i] = (r[0] - r[1]) / 2
    
    return hess, results, grad


def get_grad(func, weights, args=None):
    
    if not args:
        args = len(weights)
    
    grad = np.zeros(args)
    results = grad_gen_results(func, weights, args)
    
    for i in range(args):
        r = results[i]
        grad[i] = (r[0] - r[1]) / 2
    
    return results, grad
    
    
def get_hess(func, weights, args=None):
    
    if not args:
        args = len(weights)
        
    hess = np.zeros((args, args))
    
    results = hess_gen_results(func, weights, args)
    
    for c in itertools.combinations(range(args), r=2):
        r = results[c]
        hess[c] = (r[0] - r[1] - r[2] + r[3]) / denom
    
    hess = hess + hess.T
    
    for i in range(args):
        r = results[(i, i)]
        hess[i, i] = (r[0] + r[1] - 2 * r[2]) / 2
    
    grad = np.zeros(args)
    
    for i in range(args):
        r = results[(i, i)]
        grad[i] = (r[0] - r[1]) / 2
    
    return hess, results, grad

## Visualizing optimization surface

In [None]:
grid = 200
xs = np.linspace(- 2 * np.pi, 2 * np.pi, grid)
ys = np.linspace(- 2 * np.pi, 2 * np.pi, grid)

xv, yv = np.meshgrid(xs, ys)
zv = np.zeros((grid, grid))

for i in range(grid):
    for j in range(grid):
        w = weights.copy()
        w[0] = xv[i, j]
        w[1] = yv[i, j]
        zv[i, j] = layers[-1](w)

In [37]:
np.savez("grid.npz", xs=xs, ys=ys, zv=zv)

In [12]:
g = np.load("grid.npz")
xs = g["xs"]
ys = g["ys"]
zv = g["zv"]

In [13]:
weights

tensor([0.1       , 0.15      , 3.4536312 , 2.73521126, 2.6412488 ], requires_grad=True)

In [14]:
def gradient_descent(func, weights, reps, lr, i, args=ARGS):
    ws = [weights.copy()]
    res_dict = {}
    gs = []
    costs = [func(weights)]
    
    for r in range(reps):
        res, g = get_grad(func, ws[-1], args)
        res_dict[r] = res
        gs.append(g)
        
        w_updated = ws[-1].copy()
        w_updated[:args] -= lr * g
        
        ws.append(w_updated)
        costs.append(func(w_updated))
        
        if r % 5 == 0:
            print("Calculated for repetition {}".format(r))
    
        with open("gds_results_{}.pickle".format(i), "wb") as f:
            pickle.dump([ws, res, gs, costs], f)
    
    return ws, res_dict, gs, costs

In [15]:
reps = 50
lr = lr_gds
args = ARGS

for i, l in enumerate(layers):
    print("Calculating for layer {}".format(i))
    ws, res, gs, costs = gradient_descent(l, weights, reps, lr, i)

Calculating for layer 0
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 1
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 2
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 3
Calculated for repetition 0
Calculated f

In [16]:
def newton(func, weights, reps, lr, i, args=ARGS):
    ws = [weights.copy()]
    res_dict = {}
    gs = []
    hs = []
    costs = [func(weights)]
    
    for r in range(reps):
        hess_r, res, g = get_hess(func, ws[-1], args)
        
        res_dict[r] = res
        gs.append(g)
        hs.append(hess_r)
        
        w_updated = ws[-1].copy()
        hess_regul = regularize_hess(hess_r, lr)
        h_inv = np.real(np.linalg.inv(hess_regul))
        w_updated[:args] -= h_inv @ g
        
        ws.append(w_updated)
        costs.append(func(w_updated))
        
        if r % 5 == 0:
            print("Calculated for repetition {}".format(r))
            
        with open("new_results_{}.pickle".format(i), "wb") as f:
            pickle.dump([ws, res, gs, hs, costs], f)
    
    return ws, res_dict, gs, hs, costs

In [17]:
reps = 50
lr = lr_gds

for i, l in enumerate(layers):
    print("Calculating for layer {}".format(i))
    ws, res, gs, hs, costs = newton(l, weights, reps, lr, i)

Calculating for layer 0
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 1
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 2
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 3
Calculated for repetition 0
Calculated f

In [18]:
def newton_diag(func, weights, reps, lr, ii, args=ARGS):
    ws = [weights.copy()]
    res_dict = {}
    gs = []
    hs = []
    costs = [func(weights)]
    
    for r in range(reps):

        hess_r, res, g = get_hess_diag(func, ws[-1], args)
        
        res_dict[r] = res
        gs.append(g)
        hs.append(hess_r)
        
        w_updated = ws[-1].copy()
        
        hess_regul = regularize_diag_hess(hess_r, lr)
        update = g / hess_regul
        for i in range(len(update)):
            if np.isinf(update[i]):
                update[i] = 0
                
        w_updated[:args] -= update
     
        
        ws.append(w_updated)
        costs.append(func(w_updated))
        
        if r % 5 == 0:
            print("Calculated for repetition {}".format(r))

        with open("new_d_results_{}.pickle".format(ii), "wb") as f:
            pickle.dump([ws, res, gs, hs, costs], f)
    
    return ws, res_dict, gs, hs, costs

In [19]:
reps = 50
lr = lr_gds

for i, l in enumerate(layers):
    print("Calculating for layer {}".format(i))
    ws, res, gs, hs, costs = newton_diag(l, weights, reps, lr, i)

Calculating for layer 0
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 1
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 2
Calculated for repetition 0
Calculated for repetition 5
Calculated for repetition 10
Calculated for repetition 15
Calculated for repetition 20
Calculated for repetition 25
Calculated for repetition 30
Calculated for repetition 35
Calculated for repetition 40
Calculated for repetition 45
Calculating for layer 3
Calculated for repetition 0
Calculated f