In [92]:
import sys
import os

import pandas as pd
import numpy as np
import numdifftools as ndt
import statsmodels.api as sm

import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.patches import Patch
from matplotlib.patches import Rectangle
from matplotlib.lines import Line2D
from matplotlib.dates import DateFormatter

from scipy.stats import gamma
from scipy.stats import norm
from scipy.stats import truncnorm
from scipy.stats import vonmises
from scipy.stats import multivariate_normal
from scipy.stats import gaussian_kde
from scipy.stats import circstd
from scipy.special import iv
from scipy.special import expit
from scipy.special import logit
from scipy.special import logsumexp
from scipy.optimize import minimize
from scipy.optimize import minimize_scalar
from scipy.optimize import LinearConstraint
from scipy.optimize import linear_sum_assignment
from scipy.signal import convolve
from scipy.interpolate import interp1d
from scipy.linalg import block_diag

from datetime import datetime
from time import gmtime, strftime

from math import isclose
from copy import deepcopy
from itertools import product
import importlib
import time
import pickle
import random

import helper_funcs
import HHMM
import optimizor
import stoch_optimizor

from helper_funcs import eta_2_log_Gamma
from helper_funcs import eta0_2_log_delta
from helper_funcs import log_Gamma_2_eta
from helper_funcs import log_delta_2_eta0

np.random.seed(0)
random.seed(0)

In [None]:
#date = "Sep-19-2022" # No buffer for mini-batches, no buffer for weights
#date = "Sep-15-2022" # Make a buffer for mini-batches, but not for weights
date = "Sep-16-2022" # Make a buffer for weights, but not for mini-batches

param_folder = "/Users/evsi8432/Documents/Research/sublinear-HMM-inference/params/case_study/" + date
plot_folder = "/Users/evsi8432/Documents/Research/sublinear-HMM-inference/plt/case_study/" + date

methods = ["BFGS","SAGA","SVRG","GD","CG","control"]
K = [2,3]
partial_Es = [0,0.5,1]

tol = 1e-3

optims = {}
times_to_converge = {}
epochs_to_converge = {}
lls = {}

#"experiment_1_2022-08-05_20-27-39"
#"experiment_1_2022-08-05_20-26-58"
#"experiment_1_2022-08-05_20-26-22"
#"experiment_1_2022-08-05_20-25-49"
#"experiment_1_2022-08-05_20-25-36"
#"experiment_1_2022-08-05_20-27-53"
#"experiment_1_2022-08-05_20-25-18"
#"experiment_1_2022-08-05_20-25-12"

#with open("../params/experiment_1_2022-08-05_20-25-49","rb") as f:
#    optims = pickle.load(f)
    

In [None]:
for method in methods:
    for partial_E in partial_Es:
        if method in ["BFGS","CG","GD","control"] and partial_E > 0:
            pass
        else:
            
            # get best ll
            best_ll = -np.infty
            best_fname = param_folder + "/case_study_K-%d-%d_%s_%.1f_%03d" % (K[0],K[1],method,partial_E,0)
            
            # initialize lls
            lls[(method,partial_E)] = []
            
            # initialize epochs
            epochs_to_converge[(method,partial_E)] = []
            
            # initialize times
            times_to_converge[(method,partial_E)] = []
            
            for rand_seed in range(100):
                
                fname = param_folder + "/case_study_K-%d-%d_%s_%.1f_%03d" % (K[0],K[1],method,partial_E,rand_seed)
                
                try:
                    with open(fname, 'rb') as f:
                        optim = pickle.load(f)
                    print("loaded file %s" % fname)
                except:
                    print("file %s does not exist" % fname)
                    continue
                
                # check for some issue (likely nan in theta)
                if len(optim.time_trace) == 0:
                    print("file %s didn't optimize." % fname)
                    continue
                    
                
                # get convergence ind
                converge_ind = min(np.where(np.array(optim.grad_norm_trace) < tol)[0],
                                   default=len(optim.grad_norm_trace)-1)
                
                epochs_to_converge[(method,partial_E)].append(optim.epoch_trace[converge_ind])
                times_to_converge[(method,partial_E)].append(optim.time_trace[converge_ind])
                lls[(method,partial_E)].append(optim.log_like_trace[converge_ind])
                
                if optim.log_like_trace[-1] > best_ll:    
                    optims[(method,partial_E)] = optim
                    best_fname = fname
                    best_ll = optim.log_like_trace[-1]
                    
            print(best_fname)

In [None]:
# get min and max time
def get_bins(dictionary,nbins):
    mini = np.infty
    maxi = -np.infty
    for key,values in dictionary.items():
        if len(values) == 0:
            continue
        if mini > min(values):
            mini = min(values)
        if maxi < max(values):
            maxi = max(values)
    return np.linspace(mini - 0.1*(maxi-mini),maxi + 0.1*(maxi-mini),nbins)

        
for key in optims:
    
    print(key)
    
    if key == "control":
        continue
        
    plt.hist(times_to_converge[key],
             bins=get_bins(times_to_converge,50))
    plt.title("time to converge, %s" % str(key))
    plt.show()
    
    plt.hist(epochs_to_converge[key],
             bins=get_bins(epochs_to_converge,50))
    plt.title("epochs to converge, %s" % str(key))
    plt.show()
    
    plt.hist(lls[key],
             bins=get_bins(lls,50))
    plt.title("log-likelihoods at convergence, %s" % str(key))
    plt.show()

# Plot Results

In [93]:
methods = ["BFGS","CG","GD","SAGA","SVRG"]
cmap = cm.get_cmap('tab10')

linestyles = {("BFGS",0.0):("black","-"),
              ("CG",0.0)  :("black","--"),
              ("GD",0.0)  :("black",":"),
              ("SAGA",0.0):(cmap(0),"-"),
              ("SAGA",0.5):(cmap(0),"--"),
              ("SAGA",1.0):(cmap(0),":"),
              ("SVRG",0.0):(cmap(1),"-"),
              ("SVRG",0.5):(cmap(1),"--"),
              ("SVRG",1.0):(cmap(1),":")}

ll_star = optims[("control",0.0)].log_like_trace[-1]
theta_star = optims[("control",0.0)].theta
eta_star = optims[("control",0.0)].eta
eta0_star = optims[("control",0.0)].eta0

for key in optims:
    if ll_star < optims[key].log_like_trace[-1]:
        print("control model has lower likelihood than test model")
        print(key)
        ll_star = optims[key].log_like_trace[-1]
        Gamma_star = np.exp(optims[key].log_Gamma)
        theta_star = optims[key].theta
        eta_star = optims[key].eta
        eta0_star = optims[key].eta0

def plot_ll(optims,x_time=False,xlims=None,ylims=None):
    
    legend = []
    plt.figure(figsize=(8,6))
    
    for method in methods:
        
        for partial_E in [0,0.5,1]:
            
            PE = (partial_E == 0.5) or (partial_E == 1)
            M = int(partial_E == 0) + int(partial_E == 0.5) + 10*(partial_E == 1)
            
            if (method,partial_E) in optims:
                
                optim = optims[(method,partial_E)]
                lcol = linestyles[(method,partial_E)][0]
                ltyp = linestyles[(method,partial_E)][1]
                if x_time:
                    plt.plot(optim.time_trace,ll_star-optim.log_like_trace,
                             color=lcol,linestyle=ltyp)
                else:
                    plt.plot(optim.epoch_trace,ll_star-optim.log_like_trace,
                             color=lcol,linestyle=ltyp)   
                title = method 
                if partial_E:
                    title += ", partial E"
                else:
                    title += ", no partial E"
                title += ", M = %dT" % M
                
                legend.append(title)
                
    plt.ylabel(r"$\ell(\theta^*, \eta^*) - \ell(\theta, \eta)$")
    plt.yscale("log")
    plt.legend(legend,loc='upper center', bbox_to_anchor=(0.5, 1.2),
               ncol=3)
    #plt.legend(legend)
    
    if x_time:
        plt.xlabel("Seconds of Computation")
        plt.savefig(plot_folder + "/log-like_v_time_K-%d-%d.png"%(K[0],K[1]),dpi=500)
    else:
        plt.xlabel("Epoch")
        plt.savefig(plot_folder + "/log-like_v_epoch_K-%d-%d.png"%(K[0],K[1]),dpi=500)
        
    if not xlims is None:
        plt.xlim(xlims)
    if not ylims is None:
        plt.ylim(ylims)
        
    plt.show()
                
    return

control model has lower likelihood than test model
('SAGA', 0)


In [132]:
def find_perm(optim,eta_star,theta_star):
    
    # get the coarse-scale permutation of states
    cost_matrix = np.zeros((optim.K[0],optim.K[0]))
    
    # get the coarse-scale permutation
    Gamma_coarse_star = np.exp(eta_2_log_Gamma(eta_star)[0])
    Gamma_fine_star = np.exp(eta_2_log_Gamma(eta_star)[1])

    Gamma_coarse_optm = np.exp(eta_2_log_Gamma(optim.eta)[0])
    Gamma_fine_optm = np.exp(eta_2_log_Gamma(optim.eta)[1])

    # get the trace of each fine-scale ptm
    for k0_star in range(optim.K[0]):
        square_dists = np.zeros(optim.K[0])
        for k0_optm in range(optim.K[0]):
            square_dists[k0_optm] += (Gamma_coarse_star[k0_star,k0_star] - \
                                      Gamma_coarse_optm[k0_optm,k0_optm])**2 
            square_dists[k0_optm] += (np.trace(Gamma_fine_star[k0_star]) - \
                                      np.trace(Gamma_fine_optm[k0_optm]))**2 

        cost_matrix[k0_star] = square_dists

    star_states_coarse, optim_states_coarse = linear_sum_assignment(cost_matrix)

    # get the fine-scale permutations
    star_states_fine = [None for _ in range(optim.K[0])]
    optim_states_fine = [None for _ in range(optim.K[0])]

    for k0_star in range(optim.K[0]):
        k0_optm = optim_states_coarse[k0_star]

        # get the fine-scale permutation of states
        cost_matrix = np.zeros((optim.K[1],optim.K[1]))

        # get the fine-scale permutation
        Gamma_fine_star = np.exp(eta_2_log_Gamma(eta_star)[1][k0_star])
        Gamma_fine_optm = np.exp(eta_2_log_Gamma(optim.eta)[1][k0_optm])

        for k1_star in range(optim.K[1]):
            square_dists = np.zeros(optim.K[1])
            for k1_optm in range(optim.K[1]):
                for feature in optim.features:
                    square_dists[k1_optm] += (theta_star[k0_star][feature]['mu'][k1_star] - \
                                              optim.theta[k0_optm][feature]['mu'][k1_optm])**2 
                    square_dists[k1_optm] += (np.exp(theta_star[k0_star][feature]['log_sig'][k1_star]) - \
                                              np.exp(optim.theta[k0_optm][feature]['log_sig'][k1_optm]))**2 
                square_dists[k1_optm] += (Gamma_fine_star[k1_star,k1_star] - \
                                          Gamma_fine_optm[k1_optm,k1_optm])**2 

            cost_matrix[k1_star] = square_dists

        star_states_fine[k0_star], optim_states_fine[k0_optm] = linear_sum_assignment(cost_matrix)

    return optim_states_coarse,optim_states_fine

In [81]:
# now get distances
square_dists = np.zeros(len(optim.epoch_trace))

Coarse_Gamma_star = np.exp(eta_2_log_Gamma(eta_star)[0])
Fine_Gammas_star = np.exp(eta_2_log_Gamma(eta_star)[1])

Coarse_delta_star = np.exp(eta0_2_log_delta(eta0_star)[0])
Fine_deltas_star = np.exp(eta0_2_log_delta(eta0_star)[1])

for i in range(len(optim.epoch_trace)):
    
    # add theta
    for k0_star in range(optim.K[0]):
        k0_optm = optim_states_coarse[k0_star]
        for k1 in range(optim.K[1]):
            k1_optm = optim_states_fine[k0_optm][k1_star]
            for feature in optim.features:
                square_dists[i] += (theta_star[k0_star][feature]['mu'][k1_star] - \
                                    optim.theta[k0_optm][feature]['mu'][k1_optm])**2 
                square_dists[i] += (np.exp(theta_star[k0_star][feature]['log_sig'][k1_star]) - \
                                    np.exp(optim.theta[k0_optm][feature]['log_sig'][k1_optm]))**2 

    # add coarse Gamma and delta
    for i_star in range(optim.K[0]):
        i_optim = optim_states_coarse[i_star]
        
        for j_star in range(optim.K[0]):
            j_optim = optim_states_coarse[j_star]
            
            square_dists[i] += (Coarse_Gamma_star[i_star,j_star] - \
                                Coarse_Gamma_optm[i_optm,j_optm])**2
            
        square_dists[i] += (Coarse_delta_star[i_star] - \
                            Coarse_delta_optm[i_optm])**2
        
    # add fine Gamma and delta
    

3.372649388213094
20.15328923800974
36.57197783218965
47.853856388752796
64.63449623854945
81.05318483272937
0.0025354452365320647
10.69370707553382
22.25582729410673
26.967391598863294
37.65856322916058
49.22068344773349
0.0025354452365320647
13.531023693412488
27.93046052986406
35.479341452499305
49.00782970067525
63.40726653712682
0.0006812441668023045
9.452938777432612
19.79566687514803
23.927128602514504
33.37938613578031
43.722114233495724
0.0006812441668023045
12.537800886732123
25.965391093747055
33.18171493041305
45.718834572978366
59.14642477999329
0.000515413501028626
8.386476086920242
17.33967582032111
21.04779555383973
29.433756227258936
38.386955960659805
0.000515413501028626
11.792073872749736
24.150871391980097
31.26458891132821
43.056147370576916
55.41494488980727
0.00033094562238955897
8.167397734602341
16.438550658628078
19.983786998545643
28.150853787525598
36.42200671155133
0.00033094562238955897
11.8864749689249
23.87670512727319
31.141018701513314
43.027162724815

2.392733996580258e-05
9.3462060331335
15.998086987566609
22.64973693832323
31.99591904411677
38.64779999854988
2.312343748298268e-05
6.609166363145679
10.577012368621165
14.544651655967286
21.15379489567548
25.12164090115097
2.312343748298268e-05
9.299350250445528
15.957380143220862
22.615203317866825
31.91453044487487
38.572560337650216
2.3360216613556522e-05
6.580885916415898
10.572480416973928
14.563882211078843
21.144744767278123
25.136339267836153
2.3360216613556522e-05
9.253412812555718
15.917534209253565
22.5814628994983
31.8348523518374
38.49897374853525
2.32976439602378e-05
6.551355100591626
10.564651248138292
14.577754353299275
21.12908615624695
25.14238230379361
2.32976439602378e-05
9.209024723768847
15.879990494492732
22.550763222830934
31.75976464895583
38.43073041967971
2.3013545439200606e-05
6.507747329678793
10.528564522562059
14.549193001790371
21.056917317923727
25.077734510806994
2.3013545439200606e-05
9.161319179254143
15.835708221712762
22.509908550516425
31.671204

1.4645796074462092e-05
5.083285850348351
9.184280937938848
13.285149140344947
18.368420344897224
22.46941543248772
1.4645796074462092e-05
7.902972325223058
14.82365388768826
21.744208564969068
29.64716624439605
36.567847806861245
1.454267284608519e-05
5.064648027202233
9.16561321617104
13.26645093007016
18.331084414599545
22.43204960356835
1.454267284608519e-05
7.89030301486982
14.816923191506216
21.74341589307293
29.633704365269907
36.56032454190629
1.4308266325861186e-05
5.046532742715099
9.147929871059757
13.249200441488828
18.2957188759376
22.39711600428226
1.4308266325861186e-05
7.877307907516006
14.80948020066157
21.741525935891552
29.618819535141235
36.55099182828681
1.4185692487620822e-05
5.027842111975614
9.128788468942348
13.22960940157853
18.257437327861656
22.35838368482839
1.4185692487620822e-05
7.861644876926169
14.796393998843456
21.731017696430193
29.592648387663875
36.52739750958117
1.3715074679057907e-05
4.990875009539498
9.091009258594243
13.191020162001886
18.181881

21.332118189780868
28.52148267776164
35.592877433227095
3.3584647976878596e-06
4.174535231263371
8.2412600384635
12.307946888830319
16.482478761628897
20.54920356882902
3.3584647976878596e-06
7.1814412602837026
14.255072096504165
21.328664975891314
28.51010287771022
35.58373371393068
3.158148101560347e-06
4.160724774474686
8.222830811926894
12.284901519279405
16.44562313560599
20.507729173058205
3.158148101560347e-06
7.174827636212753
14.251036535403028
21.32721010449361
28.502034582558263
35.578243481748544
2.901785800450301e-06
4.150636706672349
8.211175743533259
12.271680994053451
16.42231479894
20.482853835800906
2.901785800450301e-06
7.168649747409709
14.247201825007977
21.32572011626553
28.49436696188944
35.57291903948771
2.8353036102625603e-06
4.146370162154068
8.206715189818386
12.267027828415642
16.413395155266098
20.473740182930417
2.8353036102625603e-06
7.167402046018404
14.248778957547056
21.33012348000865
28.49752269072344
35.57889960225209
2.6917546174772444e-06
4.1381397

In [None]:
plt.plot(square_dists[1::2],'.')
plt.plot(square_dists[0::2],'.')

In [None]:
def find_distances(optim,theta_star,eta_star,eta0_star):
    
    # get the fine-scale permutation of states
    cost_matrix = np.zeros((optim.K[0]*optim.K[1],optim.K[0]*optim.K[1]))
    
    # get the whole
    Gamma_diag = np.diag(np.exp(optims[("SAGA",0.0)].log_Gamma))
    Gamma_diag_star = np.diag(Gamma_star)

    # add theta
    for k0_star in range(optim.K[0]):
        for k1_star in range(optim.K[1]):
            square_dists = np.zeros(optim.K[0]*optim.K[1])
            for feature in optim.features:
                for k0 in range(optim.K[0]):
                    for k1 in range(optim.K[1]):
                        square_dists[optim.K[1]*k0+k1] += (theta_star[k0_star][feature]['mu'][k1_star] - \
                                                           optim.theta[k0][feature]['mu'][k1])**2 
                        square_dists[optim.K[1]*k0+k1] += (np.exp(theta_star[k0_star][feature]['log_sig'][k1_star]) - \
                                                           np.exp(optim.theta[k0][feature]['log_sig'][k1]))**2 
                        square_dists[optim.K[1]*k0+k1] += (Gamma_diag_star[optim.K[1]*k0_star+k1_star] - \
                                                           Gamma_diag[optim.K[1]*k0+k1])**2

                cost_matrix[optim.K[1]*k0_star+k1_star] = square_dists

    star_states, optim_states = linear_sum_assignment(cost_matrix)
    
    # now find distances between the ideal parameters and the current parameters
    square_dists = np.zeros(len(optim.epoch_trace))
    
    # add theta
    for i,theta in enumerate(optim.theta_trace):
        for feature in optim.features:
            for k1 in range(optim.K[1]):
                square_dists[i] += (theta_star[0][feature]['mu'][star_states[k1]] - \
                                    theta[0][feature]['mu'][optim_states[k1]])**2
    
    # get Gamma_star
    Coarse_Gamma_star = np.exp(helper_funcs.eta_2_log_Gamma(eta_star)[0])
    Coarse_Gammas = np.exp(helper_funcs.eta_2_log_Gamma(optim.eta)[0])
    
    Fine_Gammas_star = np.exp(helper_funcs.eta_2_log_Gamma(eta_star)[1])
    Fine_Gammas_star = np.exp(helper_funcs.eta_2_log_Gamma(optim.eta)[1])

In [None]:
def plot_results(optim,ll_star,method,partial_E,M,features,iters=None):
    
    # get the fine-scale permutation of states
    cost_matrix = np.zeros((optim.K[1],optim.K[1]))

    # only use theta to do it (that should be fine tbh)
    for k1_star in range(optim.K[1]):
        square_dists = np.zeros(optim.K[1])
        for feature in optims[("control",0.0)].features:
            for k1 in range(optim.K[1]):
                square_dists[k1] += (theta_star[0][feature]['mu'][k1_star] - \
                                     optim.theta[0][feature]['mu'][k1])**2 
                square_dists[k1] += (theta_star[0][feature]['log_sig'][k1_star] - \
                                     optim.theta[0][feature]['log_sig'][k1])**2 

        cost_matrix[k1_star] = square_dists

    star_states, optim_states = linear_sum_assignment(cost_matrix)
    
    title = method 
    if partial_E:
        title += ", partial E-step"
    else:
        title += ", no partial E"
    title += ", M = %dT" % M
    
    if iters is None:
        iters=len(optim.theta_trace)
        
    x = optim.epoch_trace
    
    # plot log of gradient of log-likelihood
    plt.plot(optim.epoch_trace,np.log10(optim.grad_norm_trace))
    plt.axhline(-3, c = 'k', linestyle = "--")
    plt.title(r"$\log_{10}$ of Gradient Norm v Epoch, %s" % title)
    plt.xlabel("Epoch (k)")
    plt.ylabel(r"$\log ||\widehat \nabla_\theta F^{(k,m)} + \widehat \nabla_\eta G^{(k,m)}||$")
    plt.savefig(plot_folder + "/grad-hat-ll-%s-%s-K-%d-%d.png"%(method,partial_E,K[0],K[1]),dpi=300)
    plt.show()

    distances_squared = np.zeros(len(optim.epoch_trace))
    
    for feature in features:
        
        # plot means
        legend = []
        for k0 in range(optim.K[0]):
            for k1 in range(optim.K[1]):
                plt.plot(x,[optim.theta_trace[t][k0][feature]['mu'][k1] for t in range(len(optim.theta_trace))])
                legend.append(r"$\mu^{(%d,%d)}$"%(k0+1,k1+1))

        plt.title(feature + r" $\mu$ v Epoch, %s" % title)
        plt.xlabel("Epoch")
        plt.legend(legend)
        plt.savefig(plot_folder + "/mu-%s-%s-%d-K-%d-%d.png"%(method,partial_E,M,K[0],K[1]),dpi=500)
        plt.show()

        # plot log(sigma)
        legend = []
        for k0 in range(optim.K[0]):
            for k1 in range(optim.K[1]):
                plt.plot(x,[optim.theta_trace[t][k0][feature]['log_sig'][k1] for t in range(len(optim.theta_trace))])
                legend.append(r"$\log\left(\sigma^{(%d,%d)}\right)$"%(k0+1,k1+1))

        plt.title(feature + r" $\log\left(\sigma\right)$ v Epoch, %s" % title)
        plt.xlabel("Epoch")
        plt.legend(legend)
        plt.savefig(plot_folder + "/log-sig-%s-%s-%d-K-%d-%d.png"%(method,partial_E,M,K[0],K[1]),dpi=500)
        plt.show()
    
    # plot eta_coarse
    legend = []
    for k in range(optim.K[0]):
        for l in range(optim.K[0]):
            if k != l:
                plt.plot(x,[optim.eta_trace[i][0][k,l] for i in range(len(optim.eta_trace))])
                legend.append(r"$\eta^{(%d,%d)}$" % (k+1,l+1))
    plt.title(r"$\eta$ v Epoch, %s" % title)
    plt.xlabel("Epoch")
    plt.legend(legend)
    plt.savefig(plot_folder + "/eta-%s-%s-%d-K-%d-%d.png"%(method,partial_E,M,K[0],K[1]),dpi=500)
    plt.show()
    
    # plot eta_fine
    legend = []
    for k0 in range(optim.K[0]):
        for i in range(optim.K[1]):
            for j in range(optim.K[1]):
                if i != j:
                    plt.plot(x,[optim.eta_trace[t][1][k0][i,j] for t in range(len(optim.eta_trace))])
                    legend.append(r"$\eta_%d^{*(%d,%d)}$" % (k0,i+1,j+1))
    plt.title(r"$\eta^*$ v Epoch, %s" % title)
    plt.xlabel("Epoch")
    plt.legend(legend)
    plt.savefig(plot_folder + "/eta-fine-%s-%s-%d-K-%d-%d.png"%(method,partial_E,M,K[0],K[1]),dpi=500)
    plt.show()
    
    #plt.plot([optim.grad_theta_trace[i][0]['Y']['log_sig'][0] for i in range(len(optim.theta_trace))])
    #plt.plot([optim.grad_theta_trace[i][0]['Y']['log_sig'][1] for i in range(len(optim.theta_trace))])
    #plt.plot([optim.grad_theta_trace[i][0]['Y']['log_sig'][2] for i in range(len(optim.theta_trace))])
    #plt.title("Gradients of Standard Deviations v iteration number")
    #plt.show()

In [None]:
if not os.path.isdir(plot_folder):
    os.mkdir(plot_folder)

In [None]:
plot_ll(optims,x_time=True)#,xlims=[0,3600],ylims=[1e-2,1e1])
plot_ll(optims,x_time=False)#,xlims=[0,100],ylims=[1e-2,1e1])

In [None]:
for method in methods:#["EM","SGD","SAG","SVRG","SAGA"]:
    for partial_E in [0,0.5,1]:
        PE = (partial_E == 0.5) or (partial_E == 1)
        M = int(partial_E == 0) + int(partial_E == 0.5) + 10*(partial_E == 1)
        if (method,partial_E) in optims:
            plot_results(optims[(method,partial_E)],
                         ll_star,method,PE,M,optims[("control",0.0)].features)

In [None]:
optims[('BFGS',0.0)].theta

In [None]:
optims[('BFGS',0.0)].eta

In [None]:
optims[('BFGS',0.0)].eta0