In [1]:
%pylab inline

import networkx as nx
import scipy.sparse
import pandas as pd
from scipy.io import loadmat
from sklearn.datasets import make_sparse_spd_matrix
from sklearn.datasets.base import Bunch
from sklearn.utils.extmath import squared_norm
from sklearn.covariance import empirical_covariance
from sklearn.model_selection import GridSearchCV, ShuffleSplit

from regain import datasets; reload(datasets)
from regain.datasets import is_pos_def, is_pos_semidef
from regain.plot import plot_graph_with_latent_variables
from regain.admm import latent_time_graph_lasso_v3_; reload(latent_time_graph_lasso_v3_)
from regain.admm import time_graph_lasso_; reload(time_graph_lasso_);
from regain.admm import latent_graph_lasso_; reload(latent_graph_lasso_);
from regain import utils; reload(utils)
import time

Populating the interactive namespace from numpy and matplotlib


In [2]:
# %load_ext pymatbridge
# %matlab cd /home/fede/Downloads/lvglasso-pub/
# # %matlab cd /home/veronica/src/lvglasso-pub/

# Generation of tables for results

In [3]:
ours = pd.DataFrame(columns=["n_dim_obs", "time", "iterations", "F1score", "MSE_precision", "MSE_observed",
                             "MSE_latent","mean_rank_error", "note"])
hallac = pd.DataFrame(columns=ours.columns)
chandresekeran = pd.DataFrame(columns=ours.columns)
friedman = pd.DataFrame(columns=ours.columns)

# Performances of the different algorithms

In [4]:
def ltgl_results(data_grid, dataframe, K, K_obs, ells,
                 alpha=np.linspace(0.01, 0.2, 5), tau=np.linspace(0.01, 0.8, 5),
                 beta=np.linspace(0.01, 0.8, 1), eta=np.linspace(0.01, 0.8, 2)):
    
    mdl = latent_time_graph_lasso_v3_.LatentTimeGraphLasso(
        bypass_transpose=False, assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
        max_iter=500, rho=1./ np.sqrt(np.array(data_list).shape[1]))
    params = dict(tau=tau, eta=eta, alpha=alpha, beta=beta)
    
    ltgl = GridSearchCV(mdl, params, cv=ShuffleSplit(3, test_size=.5), verbose=True).fit(data_grid)
    alpha = ltgl.best_params_['alpha']
    tau = ltgl.best_params_['tau']
    beta = ltgl.best_params_['beta']
    eta = ltgl.best_params_['eta']
    
    tic = time.time()
    # ll = ltgl.best_estimator_.fit(data_grid)
    ll = mdl.set_params(**ltgl.best_params_).fit(data_grid)
    tac = time.time()
    iterations = ll.n_iter_
    F1score = utils.structure_error(K, ll.precision_)['f1']
    MSE_observed = utils.error_norm(K_obs, ll.precision_ - ll.latent_)
    MSE_precision = utils.error_norm(K, ll.precision_)
    MSE_latent = utils.error_norm(ells, ll.latent_)
    mean_rank_error = utils.error_rank(ells, ll.latent_)
    
    res = dict(n_dim_obs=K.shape[0],
               time=tac-tic,
               iterations=iterations,
               F1score=F1score,
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               note=None,
               estimator=ll)
    dataframe = dataframe.append(res, ignore_index=True)
    return dataframe, alpha, tau, beta, eta

In [5]:
import sys
sys.path.append("/home/fede/src/TVGL/")
import TVGL; reload(TVGL)
import inferGraphL2; reload(inferGraphL2)
from TVGL import tvgl_hallac

def hallac_results(data_grid, dataframe, K, K_obs, ells, beta, alpha):
    tic = time.time()
    thetaSet, empCovSet, status, gvx = tvgl_hallac(
        data_grid.transpose(2,0,1), lamb=alpha, beta=beta, indexOfPenalty=2)
    tac = time.time()

    if status != "Optimal":
        print "not converged"
    precisions = np.array(thetaSet)
    F1score = utils.structure_error(K, precisions)['f1']
    MSE_observed = None
    MSE_precision = utils.error_norm(K, precisions)
    MSE_latent = None
    mean_rank_error = None

    res = dict(n_dim_obs=K.shape[0],
               time=tac-tic,
               iterations=gvx.n_iter_,
               F1score=F1score,
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               note=status,
               estimator=gvx)

    dataframe = dataframe.append(res, ignore_index=True)
    return dataframe

In [6]:
from sklearn.covariance import GraphLasso
def glasso_results(data_grid, dataframe, K, K_obs, ells, alpha):
    gl = GraphLasso(alpha=alpha, mode='cd', assume_centered=False)
    
    tic = time.time()
    iters = []
    precisions = []
    for d in data_grid.transpose(2,0,1):
        gl.fit(d)
        iters.append(gl.n_iter_)
        precisions.append(gl.precision_)
    tac = time.time()
    iterations = np.max(iters)
    precisions = np.array(precisions)
    F1score = utils.structure_error(K, precisions)['f1']
    MSE_observed = None
    MSE_precision = utils.error_norm(K, precisions)
    MSE_latent = None
    mean_rank_error = None
 
    res = dict(n_dim_obs=K.shape[0],
               time=tac-tic,
               iterations=iterations,
               F1score=F1score,
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               note=None,
               estimator=gl)
 
    dataframe = dataframe.append(res, ignore_index=True)
    return dataframe

In [7]:
from pymatbridge import Matlab
def chandresekeran_results(data_grid, dataframe, K, K_obs, ells, tau, alpha):
   
    emp_list = np.array([empirical_covariance(x, assume_centered=True)
                        for x in data_grid.transpose(2,0,1)]).transpose(1,2,0)
    
    n_samples = emp_list.shape[0]
    opts = dict(continuation=1, num_continuation=0,eta=np.sqrt(100),muf=1e-6,
                maxiter=500,stoptol=1e-5,over_relax_par=1, mu=n_samples)
#     start_time = time.time()
    
    mlab = Matlab()
    mlab.start()

    # single run of ADMM_B
    # result = mlab.run_func('/home/fede/Downloads/lvglasso-pub/ADMM_B.m', emp_list[...,0], alpha, tau, opts)

    # insert the ADMM_B.m function in the path
    mlab.run_code("addpath(genpath('/home/fede/Downloads/lvglasso-pub/'))")

    result = mlab.run_func("/home/fede/src/slipguru/regain/regain/wrapper/ADMMMA.m",
                           emp_list, alpha, tau)
    mlab.stop()

    ma_output = Bunch(**result['result'])
    ma_output.R = np.array(ma_output.R)
    ma_output.S = np.array(ma_output.S)
    ma_output.L = np.array(ma_output.L)
    
    F1score = utils.structure_error(K, ma_output.R + ma_output.L)['f1']
    MSE_observed = utils.error_norm(K_obs, ma_output.R)
    MSE_precision = utils.error_norm(K, ma_output.R + ma_output.L)
    MSE_latent = utils.error_norm(ells, ma_output.L)
    mean_rank_error = utils.error_rank(ells, ma_output.L)
    
    res = dict(n_dim_obs=K.shape[0],
               time=ma_output.elapsed_time,
               iterations=np.max(ma_output.iter),
               F1score=F1score,
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               note=None, estimator=ma_output)
 
    dataframe = dataframe.append(res, ignore_index=True)
    return dataframe



In [None]:
n_samples = 100
n_dim_obs = 10
n_dim_lat = 7
T=2

res = datasets.generate_dataset(
     mode='fixed', n_samples=n_samples, n_dim_lat=n_dim_lat, n_dim_obs=n_dim_obs,  T=T, epsilon=1e-2)
data_list = res.data
K = res.thetas
K_obs = res.thetas_observed
ells = res.ells
data_grid = np.array(data_list).transpose(1,2,0)  # to use it later for grid search

# Run experiments

In [None]:
reload(datasets)
data = []
taus, betas, alphas = [], [], []
for n_dim_obs in [10,100,1000]:
    print("Start with dimensions: %d" % n_dim_obs)
    n_samples = 100
    n_dim_lat = 7
    T=10

    res = datasets.generate_dataset(
         mode='fixed', n_samples=n_samples, n_dim_lat=n_dim_lat, n_dim_obs=n_dim_obs,  T=T, epsilon=1e-2)
    data_list = res.data
    K = res.thetas
    K_obs = res.thetas_observed
    ells = res.ells
    data_grid = np.array(data_list).transpose(1,2,0)  # to use it later for grid search
    data.append(data_grid)
    print("Cross Validating parameters ...")
    ours, alpha, tau, beta, eta = ltgl_results(data_grid, ours, K, K_obs, ells)
    taus.append(tau)
    alphas.append(alpha)
    betas.append(beta)
    
    print("starting Graph Lasso")
    friedman = glasso_results(data_grid, friedman,  K, K_obs, ells, alpha=alphas[-1])
    print("starting hallac")
    hallac = hallac_results(data_grid, hallac, K, K_obs, ells, beta=betas[-1], alpha=alphas[-1])
    print("starting chandresekeran")
    chandresekeran = chandresekeran_results(data_grid, chandresekeran, K, K_obs, ells, taus[-1], alphas[-1])

Start with dimensions: 10
Cross Validating parameters ...
Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  1.1min finished


starting Graph Lasso
starting hallac
Use l-2 penalty function
starting chandresekeran
Starting MATLAB on ZMQ socket ipc:///tmp/pymatbridge-f79a9974-cd4e-4717-8da2-706367017808
Send 'exit' command to kill the server
..........MATLAB started and connected!
MATLAB closed
Start with dimensions: 100
Cross Validating parameters ...
Fitting 3 folds for each of 50 candidates, totalling 150 fits


# Results chandresekeran

In [None]:
emps = []
for d in data:
    emp_list = np.array([empirical_covariance(x, assume_centered=True)
                        for x in d.transpose(2,0,1)])
    emps.append(emp_list)

In [11]:
%%matlab -i emp_list,alpha,tau -o R,S,L,obj,res,iter,elapsed_time
%alpha = 0.05;
%beta = 0.25;
opts.continuation = 1; opts.num_continuation = 0;
opts.eta = sqrt(100); opts.muf = 1e-6;
opts.maxiter = 500; opts.stoptol = 1e-5; 
opts.over_relax_par = 1;

R = cell(1, size(emp_list,3));
S = cell(1, size(emp_list,3));
L = cell(1, size(emp_list,3));
obj = cell(1, size(emp_list,3));
res = cell(1, size(emp_list,3));
iter = cell(1, size(emp_list,3));
tic
for i=1:size(emp_list,3)
    cov = emp_list(:,:,i);  %time is the last dimension
    n = size(cov,1);opts.mu = n;
    out_B = ADMM_B(cov,alpha,tau,opts);
    %%fprintf('ADMM_B: obj: %e, iter: %d, cpu: %3.1f \n',out_B.obj,out_B.iter,solve_B);
    R{i} = out_B.R;
    S{i} = out_B.S;
    L{i} = out_B.L;
    obj{i} = out_B.obj;
    res{i} = out_B.resid;
    iter{i} = out_B.iter;
end
elapsed_time = toc


elapsed_time =

    0.4038



In [None]:
   
R = np.array(R)
L = np.array(L)
iters = np.arrau(iter)
R_ar = np.array([R[i*R.shape[1]:(i+1)*R.shape[1]] for i in range(R.shape[0]/R.shape[1])])
L_ar = np.array([L[i*L.shape[1]:(i+1)*L.shape[1]] for i in range(L.shape[0]/L.shape[1])])
iters_ar = np.array([iters[i*iters.shape[1]:(i+1)*iters.shape[1]]
                     for i in range(iters.shape[0]/iters.shape[1])])
print(iters_ar)
iterations = np.max(iters_ar)
F1score = structure_error(K, R_ar+L_ar)['F1score']
MSE_observed = error_norm(K_obs, R_ar)
MSE_precision = error_norm(K, R_ar+L_ar)
MSE_latent = error_norm(ells, L_ar)
mean_rank_error = error_rank(ells, L_ar)

res = pd.DataFrame([K.shape[0], time, iterations, F1score,
                    MSE_precision, MSE_observed, MSE_latent, 
                    mean_rank_error], columns=ours.columns.values)
dataframe = dataframe.append(res, ignore_index=True)
return dataframe

In [None]:
print(ours)
print(friedman)