# :: Tunable First-Order Inexact Oracles (TFOIO) Auxiliary Functions ::
`Performance analysis of deterministic gradient based methods using (global) tunable first-order inexact oracles.`

**Author** : *Guillaume Van Dessel*, Ph.D. candidate @EPL

**Version** : 3.0 | working version for $d_p$=1

In [1]:
## packages' import
import numpy as np
import scipy.linalg as scla
import matplotlib.pyplot as plt
np.warnings.filterwarnings('ignore')

In [2]:
# computation of gamma_N / w_k's / W_N (+ alpha_k's in full mode turned on True and in 'FGD')

def compute_serial_coeffs(N=151,L=10,mu=0.1,mode='FGD',full=False):
    
    # params
    a_0 = 1 # take between ]0,1]
    
    if mode=='FGD':
        a_list = [a_0]
        A = a_0
        A_list = [a_0]
        for k in np.arange(1,N):
            base = (L+mu*A)/L 
            buf = (base+np.sqrt(base**2 + 4*base*A))/2
            a_list.append(buf)
            A += buf
            A_list.append(A)
        ret = np.array(A_list)
        if full:
            return 1,ret,ret[-1],np.array(a_list)
        else:
            return 1,ret,ret[-1]
    else:
        N_list = np.arange(N)
        rho = (1-mu/L)
        ret = rho**(N-1-N_list)
        return rho**N,ret,np.sum(ret)

In [3]:
# computation of gamma_N / w_k's / W_N (+ alpha_k's in full mode turned on True and in 'FGD')
# => restart from previously computed coefficients, restart_lists is \{w_k\}_{k=0}^{N_{prev}} (GD)
# together with \{alpha_i\}_{i=0}^{N_{prev}} (FGD + full mode) 

def compute_serial_coeffs_restart(restart_lists,N=151,L=10,mu=0.1,mode='FGD',full=False):
    
    if N>len(restart_lists):
    
        # params
        a_0 = 1 # take between ]0,1]

        if mode=='FGD':
            if full:
                A_list,alpha_list = restart_lists[0],restart_lists[1]
            else:
                A_list = restart_lists
            if len(A_list)==0:
                A = a_0
                A_list = [a_0]
            else:
                A = A_list[-1]
                A_list = list(A_list)
            for k in np.arange(len(A_list),N):
                base = (L+mu*A)/L 
                buf = (base+np.sqrt(base**2 + 4*base*A))/2
                if full:
                    a_list.append(buf)
                A += buf
                A_list.append(A)
            ret = np.array(A_list)
            if full:
                return 1,ret,ret[-1],np.array(a_list)
            else:
                return 1,ret,ret[-1]
        else:
            rho = (1-mu/L)
            ret = np.concatenate((rho**np.arange(N-1,len(restart_lists)-1,-1),restart_lists),axis=0)
            return rho**N,ret,np.sum(ret)
        
    else:
        
        if mode=='FGD':
            if full:
                A_list,alpha_list = restart_lists[0],restart_lists[1]
                return 1,A_list[:N],A_list[N-1],np.array(a_list[:N])
            else:
                A_list = restart_lists 
                return 1,A_list[:N],A_list[N-1]
        else:
            rho = (1-mu/L)
            return rho**N,restart_lists[len(restart_lists)-N:],np.sum(restart_lists[len(restart_lists)-N:])

In [10]:
def N_lower_semiok(L=10,mu=0.1,mode='FGD',epsilon=1e-5,R=10,j=1,q=1,la=5e-7,verbose=False):
    
    # params
    delta_fun = lambda eta: (q/j)*eta**(j) # delta(eta) model
    N_min, N_max_default = int(1),int(1e6)
    mult = 2 # exp. search parameter
    N_current = N_min
    non_feas = True
    store = []
    
    # find better suitable N_max
    while (non_feas and N_current < N_max_default):
        N_current = np.floor(N_current*mult)
        gamma_N,store,W_N = compute_serial_coeffs_restart(store,int(N_current),L,mu,mode)
        Gamma_N_eps = W_N*epsilon - (gamma_N*L*R**2)/2
        non_feas = (Gamma_N_eps - np.sum(store)*delta_fun(la))<0
        
    N_max = np.min([N_current,N_max_default])
    if verbose:
        print('bounds found for N_min: ['+str(int(N_min))+', '+str(int(N_max))+']')
    
    # bisection 
    while N_max-N_min>1:
        N_current = np.floor((N_max+N_min)/2)
        gamma_N,w_k,W_N = compute_serial_coeffs_restart(store,int(N_current),L,mu,mode)
        Gamma_N_eps = W_N*epsilon - (gamma_N*L*R**2)/2
        feas = (Gamma_N_eps - np.sum(w_k)*delta_fun(la))>=0
        if feas:
            N_max = N_current
        else:
            N_min = N_current
            
    if verbose:
        print('final estimation for N_min: '+str(int(N_max)))
    return int(N_max)

In [16]:
# computation of minimum N in order to achieve a worst-case epsilon primal accuracy 

def N_lower(L=10,mu=0.1,mode='FGD',epsilon=1e-5,R=10,j=1,q=1,la=5e-7,verbose=False):
    
    # params
    delta_fun = lambda eta: (q/j)*eta**(j) # delta(eta) model
    N_min, N_max_default = int(1),int(1e6)
    mult_search_schedule = [2,1.35,1.03,1.005]
    incr = 10 # minimum range of okay N assumed...
    non_feas = True
    store = []
    
    # find better suitable N_max
    k = 0
    while non_feas and k<len(mult_search_schedule):
        mult = mult_search_schedule[k]
        N_current = N_min
        if verbose:
            print('new mult search: '+str(mult))
        while (non_feas and N_current < N_max_default):
            if mult>1:
                N_current = np.floor(N_current*mult)+1
            else:
                N_current = int(N_current+incr)
            gamma_N,store,W_N = compute_serial_coeffs_restart(store,int(N_current),L,mu,mode)
            Gamma_N_eps = W_N*epsilon - (gamma_N*L*R**2)/2
            non_feas = (Gamma_N_eps - np.sum(store)*delta_fun(la))<0
        k+=1
        
    if non_feas==False:
        N_max = N_current
    else:
        N_max = N_max_default
    if verbose:
        print('best bound ? '+str(non_feas==False))
        print('bounds found for N_min: ['+str(int(N_min))+', '+str(int(N_max))+']')
    
    # bisection 
    while N_max-N_min>1:
        N_current = np.floor((N_max+N_min)/2)
        gamma_N,w_k,W_N = compute_serial_coeffs_restart(store,int(N_current),L,mu,mode)
        Gamma_N_eps = W_N*epsilon - (gamma_N*L*R**2)/2
        feas = (Gamma_N_eps - np.sum(w_k)*delta_fun(la))>=0
        if verbose:
            print(str(feas)+' for N = '+str(N_current))
        if feas:
            N_max = N_current
        else:
            N_min = N_current
            
    if verbose:
        print('final estimation for N_min: '+str(int(N_max)))
    return int(N_max)

In [11]:
N_lower(L=20,mu=0.05,mode='FGD',epsilon=1e-5,R=10,j=2,q=20.1,la=1e-15,verbose=True)

new mult search: 2
best bound ? True
bounds found for N_min: [1, 511]
False for N = 256.0
True for N = 383.0
True for N = 319.0
True for N = 287.0
False for N = 271.0
True for N = 279.0
True for N = 275.0
False for N = 273.0
True for N = 274.0
final estimation for N_min: 274


274

In [6]:
# representation of the workload sharing process between conv_term and error_term 

def partitioner(N=151,conv_term=0.6e-5,error_term=0.4e-5):

    repartition = [conv_term,error_term]
    repartition.append(np.sum(repartition))
    sources = np.arange(len(repartition))
    
    print('percentages : conv term <- '+str(conv_term/(conv_term+error_term))+' , error term <- '+str(error_term/(conv_term+error_term)))

    fig, ax = plt.subplots(figsize=(9,6))
    plt.grid(axis='y')
    barlist = plt.bar(sources,repartition)
    plt.title('Repartition of the accuracy for N = '+str(int(N)))
    plt.xticks(sources, ('Convergence Term', 'Error Term','Target Accuracy'))
    barlist[0].set_color('orange')
    barlist[1].set_color('red')
    barlist[2].set_color('blue')
    plt.show()

In [7]:
# retrieve optimal eta schedules, depending on the cost model (i : inv prop, ii : minus log)

def retrieve_opt_schedule_i(w_k,r,beta,lambda0,j,la,lb,Na,Nb,q):
    basis = (r*beta/(lambda0*w_k*q))**(1/(r+j))
    basis[:int(Nb)] = lb
    basis[int(len(basis)-Na):] = la
    return basis

def retrieve_opt_schedule_ii(w_k,beta,lambda0,j,la,lb,Na,Nb,q):
    basis = (beta/(lambda0*w_k*q))**(1/(j))
    basis[:int(Nb)] = lb
    basis[int(len(basis)-Na):] = la
    return basis

In [8]:
# solving KKT conditions, depending on the cost model (i : inv prop, ii : minus log )

def kkt_i_imp_bisec(N=151,L=10,mu=0.1,mode='FGD',epsilon=1e-5,R=10,j=1,q=1,r=1,la=5e-7,lb=1e-4,beta=1,bool_lb=True,display=True):
   
    N_list = np.arange(N+1)
    delta_fun = lambda eta: (q/j)*eta**(j)
    gamma_N,w_k,W_N = compute_serial_coeffs(N,L,mu,mode)
    Gamma_N_eps = W_N*epsilon - (gamma_N*L*R**2)/2
    check_feas = (Gamma_N_eps - np.sum(w_k)*delta_fun(la) >= 0) # check feasibility
   
    if check_feas==False:
       
        if display:
            print(' => error infeasible || '+'Gamma_N_eps = '+str(Gamma_N_eps)+' , (sum_k w_k) delta(la) = '+str(np.sum(w_k)*delta_fun(la)))
    
        return None,None,None,None,None
    
    else: # bisection-search
       
        if display:
            print(' => experiment started || '+'Gamma_N_eps = '+str(Gamma_N_eps)+' , (sum_k w_k) delta(la) = '+str(np.sum(w_k)*delta_fun(la)))
        
        if bool_lb==False:
            Nb_list = np.array([0])
            lb  = 100000000
        else:
            Nb_list = np.arange(N+1)
        Nb_bound = np.max(Nb_list)
        ind_Nb_list = int(0)
        
        # clear version 
        
        while ind_Nb_list <= Nb_bound:
            
            Nb_tilde_hat = Nb_list[ind_Nb_list]
            Na_min,Na_max = 0,N-Nb_tilde_hat
            
            while Na_max-Na_min > 0:
                
                Na_tilde_hat = int(np.floor((Na_max+Na_min)/2))
                
                Psi_N_eps = Gamma_N_eps - delta_fun(la)*np.sum(w_k[N-Na_tilde_hat:N]) - delta_fun(lb)*np.sum(w_k[:Nb_tilde_hat])
                Upsilon_N = np.sum((q*w_k[Nb_tilde_hat:N-Na_tilde_hat])**(r/(r+j))) * ((r*beta)**(j/(r+j)))/j
                lambda_0_opt_hat_prev = (Upsilon_N/Psi_N_eps)**((r+j)/j)
                
                Na_tilde_hat = int(Na_tilde_hat+1)
                
                Psi_N_eps = Gamma_N_eps - delta_fun(la)*np.sum(w_k[N-Na_tilde_hat:N]) - delta_fun(lb)*np.sum(w_k[:Nb_tilde_hat])
                Upsilon_N = np.sum((q*w_k[Nb_tilde_hat:N-Na_tilde_hat])**(r/(r+j))) * ((r*beta)**(j/(r+j)))/j
                lambda_0_opt_hat_next = (Upsilon_N/Psi_N_eps)**((r+j)/j)
                
                if lambda_0_opt_hat_next-lambda_0_opt_hat_prev>0:
                    Na_min = int(Na_tilde_hat)
                else:
                    Na_max = int(Na_tilde_hat-1)
              
            Na_tilde_hat = int(Na_min)
            Psi_N_eps = Gamma_N_eps - delta_fun(la)*np.sum(w_k[N-Na_tilde_hat:N]) - delta_fun(lb)*np.sum(w_k[:Nb_tilde_hat])
            Upsilon_N = np.sum((q*w_k[Nb_tilde_hat:N-Na_tilde_hat])**(r/(r+j))) * ((r*beta)**(j/(r+j)))/j
            lambda_0_opt_hat = (Upsilon_N/Psi_N_eps)**((r+j)/j)
            
            if Psi_N_eps<0:
                feas = False
            else:
                if Na_tilde_hat>0:
                    feas1 = (lambda_0_opt_hat - (r*beta)/(q*la**(r+j)*w_k[int(N-Na_tilde_hat)]))>=0
                else:
                    feas1 = True
                if Nb_tilde_hat>0:
                    feas2 = (lambda_0_opt_hat - (r*beta)/(q*lb**(r+j)*w_k[int(Nb_tilde_hat-1)]))<=0
                else:
                    feas2 = True
                if Na_tilde_hat<N:
                    feas3 = (lambda_0_opt_hat - (r*beta)/(q*la**(r+j)*w_k[int(N-Na_tilde_hat-1)]))<0
                else:
                    feas3 = True
                if Nb_tilde_hat<N and bool_lb:
                    feas4 = (lambda_0_opt_hat - (r*beta)/(q*lb**(r+j)*w_k[int(Nb_tilde_hat)]))>0
                else:
                    feas4 = True

                feas = feas1 and feas2 and feas3 and feas4
            
            if feas:
                Na_best = Na_tilde_hat
                Nb_best = Nb_tilde_hat
                ind_Nb_list = N+1
            else:
                ind_Nb_list += int(1)
        
        if display:
            print('/!\ KKT fulfilled for (Na,Nb) = ('+str(Na_best)+', '+str(Nb_best)+') /!\ ')
           
        return Na_best,Nb_best,lambda_0_opt_hat,w_k,W_N
    
    
def kkt_ii_imp_bisec(N=151,L=10,mu=0.1,mode='FGD',epsilon=1e-5,R=10,j=1,q=1,la=5e-7,lb=1e-3,beta=1,bool_lb=True,display=True):
   
    N_list = np.arange(N+1)
    delta_fun = lambda eta: (q/j)*eta**(j)
    gamma_N,w_k,W_N = compute_serial_coeffs(N,L,mu,mode)
    Gamma_N_eps = W_N*epsilon - (gamma_N*L*R**2)/2
    check_feas = (Gamma_N_eps - np.sum(w_k)*delta_fun(la) >= 0) # check feasibility
   
    if check_feas==False:
       
        if display:
            print(' => error infeasible || '+'Gamma_N_eps = '+str(Gamma_N_eps)+' , (sum_k w_k) delta(la) = '+str(np.sum(w_k)*delta_fun(la)))
        
        return None,None,None,None,None
   
    else: # bisection-search
       
        if display:
            print(' => experiment started || '+'Gamma_N_eps = '+str(Gamma_N_eps)+' , (sum_k w_k) delta(la) = '+str(np.sum(w_k)*delta_fun(la)))
        
        if bool_lb==False:
            Nb_list = np.array([0])
            lb = 100000000
        else:
            Nb_list = np.arange(N+1)
        Nb_bound = np.max(Nb_list)
        ind_Nb_list = int(0)
        
        # clear version 
        
        while ind_Nb_list <= Nb_bound:
            
            Nb_tilde_hat = Nb_list[ind_Nb_list]
            Na_min,Na_max = 0,N-Nb_tilde_hat
            
            while Na_max-Na_min > 0:
                
                Na_tilde_hat = int(np.floor((Na_max+Na_min)/2))
                
                Psi_N_eps = Gamma_N_eps - delta_fun(la)*np.sum(w_k[N-Na_tilde_hat:N]) - delta_fun(lb)*np.sum(w_k[:Nb_tilde_hat])
                Upsilon_N = np.sum((q*w_k[Nb_tilde_hat:N-Na_tilde_hat])**(0)) * ((beta)**(1))/j
                lambda_0_opt_hat_prev = (Upsilon_N/Psi_N_eps)**(1)
                
                Na_tilde_hat = int(Na_tilde_hat+1)
                
                Psi_N_eps = Gamma_N_eps - delta_fun(la)*np.sum(w_k[N-Na_tilde_hat:N]) - delta_fun(lb)*np.sum(w_k[:Nb_tilde_hat])
                Upsilon_N = np.sum((q*w_k[Nb_tilde_hat:N-Na_tilde_hat])**(0)) * ((beta)**(1))/j
                lambda_0_opt_hat_next = (Upsilon_N/Psi_N_eps)**(1)
                
                if lambda_0_opt_hat_next-lambda_0_opt_hat_prev>0:
                    Na_min = int(Na_tilde_hat)
                else:
                    Na_max = int(Na_tilde_hat-1)
              
            Na_tilde_hat = int(Na_min)
            Psi_N_eps = Gamma_N_eps - delta_fun(la)*np.sum(w_k[N-Na_tilde_hat:N]) - delta_fun(lb)*np.sum(w_k[:Nb_tilde_hat])
            Upsilon_N = np.sum((q*w_k[Nb_tilde_hat:N-Na_tilde_hat])**(0)) * ((beta)**(1))/j
            lambda_0_opt_hat = (Upsilon_N/Psi_N_eps)**(1)
            
            if Psi_N_eps<0:
                feas = False
            else:
                if Na_tilde_hat>0:
                    feas1 = (lambda_0_opt_hat - (beta)/(q*la**(j)*w_k[int(N-Na_tilde_hat)]))>=0
                else:
                    feas1 = True
                if Nb_tilde_hat>0:
                    feas2 = (lambda_0_opt_hat - (beta)/(q*lb**(j)*w_k[int(Nb_tilde_hat-1)]))<=0
                else:
                    feas2 = True
                if Na_tilde_hat<N:
                    feas3 = (lambda_0_opt_hat - (beta)/(q*la**(j)*w_k[int(N-Na_tilde_hat-1)]))<0
                else:
                    feas3 = True
                if Nb_tilde_hat<N and bool_lb:
                    feas4 = (lambda_0_opt_hat - (beta)/(q*lb**(j)*w_k[int(Nb_tilde_hat)]))>0
                else:
                    feas4 = True

                feas = feas1 and feas2 and feas3 and feas4
            
            if feas:
                Na_best = Na_tilde_hat
                Nb_best = Nb_tilde_hat
                ind_Nb_list = N+1
            else:
                ind_Nb_list += int(1)
        
        if display:
            print('/!\ KKT fulfilled for (Na,Nb) = ('+str(Na_best)+', '+str(Nb_best)+') /!\ ')
           
        return Na_best,Nb_best,lambda_0_opt_hat,w_k,W_N
    
def solve_KKT_i(K=10,N=151,L=10,mu=0.1,mode='FGD',epsilon=1e-5,R=10,j=1,q=1,r=1,la=5e-7,lb=1e-4,beta=1,bool_lb=True,scale='log',display=True):
    
    Na,Nb,lambda0,w_k,W_N = kkt_i_imp_bisec(N,L,mu,mode,epsilon,R,j,q,r,la,lb,beta,bool_lb,display=False)
    eta_opt,eta_opt_const = None,None
    
    if bool_lb==False:
        lb = 100000000
    
    if lambda0 is not None:

        eta_opt = retrieve_opt_schedule_i(w_k,r,beta,lambda0,j,la,lb,Na,Nb,q)
        cost_fun = lambda eta: K*len(eta) + np.sum(beta/eta**r)
        s_n = np.sum(w_k)
        if mode=='FGD':
            gamma_n = 1
            w_n = w_k[-1]
        else:
            gamma_n = (1-(mu/L))**N
            w_n = s_n
        try_cost = cost_fun(eta_opt)
        eta_opt_const_base = np.min([(((w_n)*epsilon/s_n - (gamma_n*L*R**2)/(2*s_n))*(j/q))**(1/j),lb])
        eta_opt_const = np.ones(N) * eta_opt_const_base
        if eta_opt_const_base<la or eta_opt_const_base>lb:
            try_cost_const = np.nan
        else:
            try_cost_const = cost_fun(eta_opt_const)
        if display:
            N_list = np.arange(N)
            print('/!\ KKT fulfilled for (Na,Nb) = ('+str(Na)+', '+str(Nb)+') /!\ ')
            print('cost gain: '+str(100*(try_cost_const-try_cost)/(try_cost_const))+' %')

            if scale=='lin':

                plt.figure(figsize=(9,6))
                plt.title('Visualization of optimal schedule $\{\eta_k^*\}_{k=0}^{N-1}$')
                plt.grid()
                plt.ylabel('$\eta$ value')
                plt.xlabel('iteration index')
                plt.plot(N_list,eta_opt,color='blue')
                plt.plot(N_list,eta_opt_const,color='green')
                if lb < 100000000:
                    plt.plot(N_list,np.ones(N)*la,color='brown')
                    plt.plot(N_list,np.ones(N)*lb,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$-level','$\ell_b$-level'])
                else:
                    plt.plot(N_list,np.ones(N)*la,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$'])
                plt.savefig('kkt_schedules.pdf')

            else:

                plt.figure(figsize=(9,6))
                plt.title('Visualization of optimal schedule $\{\eta_k^*\}_{k=0}^{N-1}$')
                plt.grid()
                plt.ylabel('$\eta$ value')
                plt.xlabel('iteration index')
                plt.semilogy(N_list,eta_opt,color='blue')
                plt.semilogy(N_list,eta_opt_const,color='green')
                if lb < 100000000:
                    plt.semilogy(N_list,np.ones(N)*la,color='brown')
                    plt.semilogy(N_list,np.ones(N)*lb,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$','$\ell_b$'])
                else:
                    plt.semilogy(N_list,np.ones(N)*la,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$'])
                plt.savefig('kkt_schedules_logscale.pdf')

            plt.show()
            
    return eta_opt,eta_opt_const
    
    
def solve_KKT_ii(K=10,N=151,L=10,mu=0.1,mode='FGD',epsilon=1e-5,R=10,j=1,q=1,la=5e-7,lb=1e-3,beta=1,bool_lb=True,scale='log',display=True):
    
    Na,Nb,lambda0,w_k,W_N = kkt_ii_imp_bisec(N,L,mu,mode,epsilon,R,j,q,la,lb,beta,bool_lb,display=False)
    eta_opt,eta_opt_const = None,None
    
    if bool_lb==False:
        lb = 100000000
    
    if lambda0 is not None:
        
        eta_opt = retrieve_opt_schedule_ii(w_k,beta,lambda0,j,la,lb,Na,Nb,q)
        cost_fun = lambda eta: K*len(eta) - beta * np.sum(np.log(eta))
        s_n = np.sum(w_k)
        if mode=='FGD':
            gamma_n = 1
            w_n = w_k[-1]
        else:
            gamma_n = (1-(mu/L))**N
            w_n = s_n
        try_cost = cost_fun(eta_opt)
        eta_opt_const_base = np.min([(((w_n)*epsilon/s_n - (gamma_n*L*R**2)/(2*s_n))*(j/q))**(1/j),lb])
        eta_opt_const = np.ones(N) * eta_opt_const_base
        if eta_opt_const_base<la or eta_opt_const_base>lb:
            try_cost_const = np.nan
        else:
            try_cost_const = cost_fun(eta_opt_const)
        if display:
            N_list = np.arange(N)
            print('/!\ KKT fulfilled for (Na,Nb) = ('+str(Na)+', '+str(Nb)+') /!\ ')
            print('cost gain: '+str(100*(try_cost_const-try_cost)/(try_cost_const))+' % ')

            if scale=='lin':

                plt.figure(figsize=(9,6))
                plt.title('Visualization of optimal schedule $\{\eta_k^*\}_{k=0}^{N-1}$')
                plt.grid()
                plt.ylabel('$\eta$ value')
                plt.xlabel('iteration index')
                plt.plot(N_list,eta_opt,color='blue')
                plt.plot(N_list,eta_opt_const,color='green')
                if lb < 100000000:
                    plt.plot(N_list,np.ones(N)*la,color='brown')
                    plt.plot(N_list,np.ones(N)*lb,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$-level','$\ell_b$-level'])
                else:
                    plt.plot(N_list,np.ones(N)*la,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$'])
                plt.savefig('kkt_schedules.pdf')

            else:

                plt.figure(figsize=(9,6))
                plt.title('Visualization of optimal schedule $\{\eta_k^*\}_{k=0}^{N-1}$')
                plt.grid()
                plt.ylabel('$\eta$ value')
                plt.xlabel('iteration index')
                plt.semilogy(N_list,eta_opt,color='blue')
                plt.semilogy(N_list,eta_opt_const,color='green')
                if lb < 100000000:
                    plt.semilogy(N_list,np.ones(N)*la,color='brown')
                    plt.semilogy(N_list,np.ones(N)*lb,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$','$\ell_b$'])
                else:
                    plt.semilogy(N_list,np.ones(N)*la,color='brown')
                    plt.legend(['optimal schedule','constant schedule','$\ell_a$'])
                plt.savefig('kkt_schedules_logscale.pdf')

            plt.show()
            
    return eta_opt,eta_opt_const