# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Optimize-Dynamic-Subgraphs" data-toc-modified-id="Optimize-Dynamic-Subgraphs-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Optimize Dynamic Subgraphs</a></div><div class="lev2 toc-item"><a href="#Initialize-Environment" data-toc-modified-id="Initialize-Environment-11"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Initialize Environment</a></div><div class="lev2 toc-item"><a href="#Generate-List-of-Data" data-toc-modified-id="Generate-List-of-Data-12"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Generate List of Data</a></div><div class="lev2 toc-item"><a href="#Construct-Configuration-Matrices" data-toc-modified-id="Construct-Configuration-Matrices-13"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Construct Configuration Matrices</a></div><div class="lev2 toc-item"><a href="#NMF-Cross-Validation-Optimizaion" data-toc-modified-id="NMF-Cross-Validation-Optimizaion-14"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>NMF Cross-Validation Optimizaion</a></div>

# Optimize Dynamic Subgraphs

## Initialize Environment

In [None]:
try:
    %load_ext autoreload
    %autoreload 2
    %reset
except:
    print 'NOT IPYTHON'

from __future__ import division

import os
import sys
import glob

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import scipy.io as io
import h5py
import matplotlib.pyplot as plt
from matplotlib import rcParams

sys.path.append('/Users/akhambhati/Developer/hoth_research/Echobase')
import Echobase
convert_conn_vec_to_adj_matr = Echobase.Network.Transforms.configuration.convert_conn_vec_to_adj_matr
convert_adj_matr_to_cfg_matr = Echobase.Network.Transforms.configuration.convert_adj_matr_to_cfg_matr
nmf = Echobase.Network.Partitioning.Subgraph.nmf

rcParams = Echobase.Plotting.fig_format.update_rcparams(rcParams)

path_CoreData = '/Users/akhambhati/Remotes/CORE.fMRI_multiband.mmattar/restdata'
path_PeriphData = '/Users/akhambhati/Remotes/RSRCH.NMF_Subnetworks'
path_InpData = path_PeriphData + '/e01-Dyne_FuncNetw'
path_ExpData = path_PeriphData + '/e05-DynFuncSubgraph_Population'

for path in [path_CoreData, path_PeriphData, path_InpData, path_ExpData]:
    if not os.path.exists(path):
        print('Path: {}, does not exist'.format(path))
        os.makedirs(path)

## Generate List of Data

In [None]:
subj_date = [full_subj_path.split('/')[-1]
             for full_subj_path in glob.iglob('{}/Adjacency.*.npz'.format(path_InpData))]

subj_ids = {}
for s_d in subj_date:
    subj, date = s_d.split('.')[1:3]
    try:
        subj_ids[subj]
    except KeyError:
        subj_ids[subj] = []
    
    subj_ids[subj].append(date)

## Construct Configuration Matrices
*__WARNING: Will Delete Existing Output__*

In [None]:
# Remove all existing output (retains pipe/pipeline definitions)
rm_outp = glob.glob("{}/NMF_Optimization.CfgMatr.npz".format(path_ExpData))

for rm_type in [rm_outp]:
    for path in rm_type:
        try:
            os.remove(path)
        except:
            print("{} not found".format(path))

In [None]:
cfg_matr = []
cfg_name = []
for df_name in subj_date:
    
    # Read the input data
    df = np.load('{}/{}'.format(path_InpData, df_name))

    for cfg_vec in conv_adj_matr_to_cfg_matr(df['adj_matr']):
        cfg_matr.append(cfg_vec)
        cfg_name.append('.'.join(df_name.split('.')[1:3]))

# Cache the configuration matrices
cfg_matr = np.array(cfg_matr)
cfg_name = np.array(cfg_name)
np.savez('{}/NMF_Optimization.CfgMatr.npz'.format(path_ExpData),
         cfg_matr=cfg_matr, cfg_name=cfg_name)

## NMF Cross-Validation Optimizaion

In [None]:
# Load configuration matrix
cfg_data = np.load('{}/NMF_Optimization.CfgMatr.npz'.format(path_ExpData))
cfg_matr = cfg_data['cfg_matr']

# Set search params
search_alpha = [0.0] #list(np.linspace(0.01, 1.0, 1))
search_beta = [0.0] #list(np.linspace(0.01, 1.0, 1))
search_rank = list(np.arange(2, 31))
search_fold = 10

# Cross-Validation Optimization
str_path = '{}/NMF_Optimization.Error.npz'.format(path_ExpData)
if os.path.exists(str_path):
    os.remove(str_path)
    
opt_dict = Echobase.Network.Partitioning.Subgraph.optimize_nmf.cross_validation(
    cfg_matr, search_alpha, search_beta, search_rank, search_fold, n_proc=8,
    str_path=str_path)

np.savez('{}/NMF_Optimization.Error.npz'.format(path_ExpData),
         alpha=opt_dict['alpha'],
         beta=opt_dict['beta'],
         rank=opt_dict['rank'],
         error=opt_dict['error'])

In [None]:
opt_dict = np.load('{}/NMF_Optimization.Error.npz'.format(path_ExpData))

error_rank = [opt_dict['error'][np.flatnonzero(opt_dict['rank']==rank)].mean()
              for rank in np.unique(opt_dict['rank'])]
rank_ix = np.flatnonzero(opt_dict['rank'] == 24)
error_alpha = [opt_dict['error'][np.flatnonzero(opt_dict['alpha']==alpha)].mean()
              for alpha in np.unique(opt_dict['alpha'][rank_ix])]
error_beta = [opt_dict['error'][np.flatnonzero(opt_dict['beta']==beta)].mean()
              for beta in np.unique(opt_dict['beta'][rank_ix])]

%matplotlib inline
plt.figure()
ax = plt.subplot(111)
opt_ix = np.argmin(error_rank)
ax.plot(np.unique(opt_dict['rank']), error_rank)
ax.scatter(np.unique(opt_dict['rank'])[opt_ix], error_rank[opt_ix], marker='x', color='r', s=30)

plt.figure()
ax = plt.subplot(111)
opt_ix = np.argmin(error_alpha)
ax.plot(np.unique(opt_dict['alpha']), error_alpha)
ax.scatter(np.unique(opt_dict['alpha'])[opt_ix], error_alpha[opt_ix], marker='x', color='r', s=30)

plt.figure()
ax = plt.subplot(111)
opt_ix = np.argmin(error_beta)
ax.plot(np.unique(opt_dict['beta']), error_beta)
ax.scatter(np.unique(opt_dict['beta'])[opt_ix], error_beta[opt_ix], marker='x', color='r', s=30)

opt_params = Echobase.Network.Partitioning.Subgraph.optimize_nmf.min_crossval_param(dict(opt_dict))
print('Optimal Rank: {}'.format(opt_params['rank']))
print('Optimal Alpha: {}'.format(opt_params['alpha']))
print('Optimal Beta: {}'.format(opt_params['beta']))