# Scalability
This notebook show the scalability analysis performed in the paper.
We compared our LTGL model with respect to state-of-the art software for graphical inference, such as LVGLASSO and TVGL.

<font color='red'><b>Note</b></font>: GL is not included in the comparison, since it is based on coordinate descent and it does not have the eigenvalue decomposition.

In [None]:
from __future__ import print_function

%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time

from itertools import product

from regain import datasets, utils

In [None]:
import performance_utils as putils

In [None]:
# prepare data
n_times = [20, 50, 100]
n_dims = np.sqrt(np.logspace(2, 5, 10)).astype(int)

n_samples = 200
n_dim_lat = 2

np.random.seed(42)
with utils.suppress_stdout():
    data = {
        (dim, T): datasets.make_dataset(
            mode='ma', n_samples=n_samples, 
            n_dim_lat=n_dim_lat, n_dim_obs=dim,
            T=T, epsilon=1e-2)
        for dim, T in (product(n_dims, n_times))
    }

In [None]:
alpha = 1
tau = 1
beta = 1
eta = 1

methods = ['LTGL', 'GL', 'LVGLASSO', 'TVGL']
scores = sorted(['iterations', 'time', 'note'])

cols = pd.MultiIndex.from_product([scores, n_dims], names=('score', 'dim'))
rows = pd.MultiIndex.from_product([methods, n_times], names=('method', 'time'))

dff = pd.DataFrame(columns=cols, index=rows)
idx = pd.IndexSlice

In [None]:
for i, (k, res) in enumerate(sorted(data.items())):
    dim = k[0]
    print("Start with: dim=%d, T=%d (it %d)" % (k[0], k[1], i))
    data_list = res.data
    K = res.thetas
    K_obs = res.thetas_observed
    ells = res.ells
    data_grid = np.array(data_list).transpose(
        1, 2, 0)  # to use it later for grid search

    print("starting LTGL ...\r", end='')
    res_l = putils.ltgl_results(
        res.X, res.y, K, K_obs, ells, alpha=alpha, beta=beta, tau=tau, eta=eta)
    dff.loc[idx['LTGL', k[1]], idx[:, k[0]]] = [res_l[x] for x in scores]

    print("starting GL...\r", end='')
    res = putils.glasso_results(data_grid, K, K_obs, ells, alpha=alpha)

    # Use this for the R-implementation
    # res = friedman_results(data_grid, K, K_obs, ells, alpha=alpha)
    dff.loc[idx['GL', k[1]], idx[:, k[0]]] = [res[x] for x in scores]

    print("starting LVGLASSO...\r", end='')
    res_c = putils.chandresekeran_results(
        data_grid, K, K_obs, ells, tau=tau, alpha=alpha)
    dff.loc[idx['LVGLASSO', k[1]], idx[:, k[0]]] = [res_c[x] for x in scores]

In [None]:
df.to_pickle("scalability_no_hallac.pkl")

In [None]:
logger = init_logger('scalability')

Since this is computationally expensive, we divide the results in two cells ...

In [None]:
for i, (k, res) in enumerate(sorted(data.items())):
    dim = k[0]
    logging.info("Start TVGL with: dim=%d, T=%d (it %d)" % (k[0], k[1], i))
    data_list = res.data
    K = res.thetas
    K_obs = res.thetas_observed
    ells = res.ells
    data_grid = np.array(data_list).transpose(
        1, 2, 0)  # to use it later for grid search

    try:
        #         print("starting TVGL...\r", end='')
        res = hallac_results(data_grid, K, K_obs, ells, beta=beta, alpha=alpha)
        dff.loc[idx['TVGL', k[1]], idx[:, k[0]]] = [res[x] for x in scores]
        dff.to_pickle("scalability_hallac.pkl")
    except:
        pass

## Plotting

In [None]:
# load pickle
with open("scalability.pkl", 'rb') as f:
    df = pkl.load(f)

df.sortlevel(inplace=True)

idx = pd.IndexSlice
scores = df.columns.levels[0]
n_dims = df.columns.levels[1]
methods = df.index.levels[0]
n_times = df.index.levels[1]

Let's plot a horizontal figure.

In [None]:
style = ['-', '--', ':']

f, ax = plt.subplots(1, len(n_times), sharey=True, figsize=(12, 2), dpi=600)

ax[0].set_ylabel("seconds")
# ax[0].set_ylim([.1,None])
for i, t in enumerate(n_times):
    for j, m in enumerate([m for m in methods if m != 'GL']):
        if m == 'GL':
            continue
        ax[i].plot(
            n_dims * (n_dims + 1) * t,
            df.loc[idx[m, t], idx['time', :]].values, ls=style[j], label=m)

    ax[i].set_yscale('log')
    ax[i].set_xscale('log')
    ax[i].set_xlabel(r"number of unknowns at T = %d" % t)
    ax[i].grid('on')
#     ax[i].set_title("n_times: %d" % t)
# plt.xticks(range(4), ours.n_dim_obs)
ax[0].set_yticks([1, 10, 1e2, 1e3, 1e4])
lgd = ax[1].legend(
    bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode="expand",
    borderaxespad=0.)
f.tight_layout()

In [None]:
f.savefig(
    "scalability.pdf", dpi=600, transparent=True, bbox_extra_artists=(lgd, ),
    bbox_inches='tight')