# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Initialize-Environment" data-toc-modified-id="Initialize-Environment-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Initialize Environment</a></div><div class="lev2 toc-item"><a href="#Generate-List-of-Data" data-toc-modified-id="Generate-List-of-Data-11"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Generate List of Data</a></div><div class="lev2 toc-item"><a href="#Construct-Configuration-Matrices" data-toc-modified-id="Construct-Configuration-Matrices-12"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Construct Configuration Matrices</a></div><div class="lev1 toc-item"><a href="#Optimize-Dynamic-Subgraphs" data-toc-modified-id="Optimize-Dynamic-Subgraphs-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Optimize Dynamic Subgraphs</a></div><div class="lev2 toc-item"><a href="#NMF-Cross-Validation-Optimizaion" data-toc-modified-id="NMF-Cross-Validation-Optimizaion-21"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>NMF Cross-Validation Optimizaion</a></div><div class="lev1 toc-item"><a href="#Detect-Dynamic-Subgraphs" data-toc-modified-id="Detect-Dynamic-Subgraphs-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Detect Dynamic Subgraphs</a></div><div class="lev2 toc-item"><a href="#Run-Non-Negative-Matrix-Factorization-Algorithm" data-toc-modified-id="Run-Non-Negative-Matrix-Factorization-Algorithm-31"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Run Non-Negative Matrix Factorization Algorithm</a></div><div class="lev2 toc-item"><a href="#Consensus-Clustering-of-Dynamic-Subgraphs" data-toc-modified-id="Consensus-Clustering-of-Dynamic-Subgraphs-32"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Consensus Clustering of Dynamic Subgraphs</a></div><div class="lev3 toc-item"><a href="#Plot-Subgraphs" data-toc-modified-id="Plot-Subgraphs-321"><span class="toc-item-num">3.2.1&nbsp;&nbsp;</span>Plot Subgraphs</a></div><div class="lev1 toc-item"><a href="#Detect-Geometric-Null-Subgraphs" data-toc-modified-id="Detect-Geometric-Null-Subgraphs-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Detect Geometric-Null Subgraphs</a></div><div class="lev2 toc-item"><a href="#Generate-Geometric-Adjacency-Matrix" data-toc-modified-id="Generate-Geometric-Adjacency-Matrix-41"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Generate Geometric Adjacency Matrix</a></div><div class="lev2 toc-item"><a href="#Run-Non-Negative-Matrix-Factorization-Algorithm" data-toc-modified-id="Run-Non-Negative-Matrix-Factorization-Algorithm-42"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Run Non-Negative Matrix Factorization Algorithm</a></div><div class="lev3 toc-item"><a href="#Plot-Subgraphs" data-toc-modified-id="Plot-Subgraphs-421"><span class="toc-item-num">4.2.1&nbsp;&nbsp;</span>Plot Subgraphs</a></div><div class="lev1 toc-item"><a href="#Detect-Edge-Constrained-Null-Subgraphs" data-toc-modified-id="Detect-Edge-Constrained-Null-Subgraphs-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Detect Edge Constrained Null Subgraphs</a></div><div class="lev2 toc-item"><a href="#Run-Non-Negative-Matrix-Factorization-Algorithm" data-toc-modified-id="Run-Non-Negative-Matrix-Factorization-Algorithm-51"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Run Non-Negative Matrix Factorization Algorithm</a></div><div class="lev3 toc-item"><a href="#Plot-Subgraphs" data-toc-modified-id="Plot-Subgraphs-511"><span class="toc-item-num">5.1.1&nbsp;&nbsp;</span>Plot Subgraphs</a></div>

# Initialize Environment

In [None]:
try:
    %load_ext autoreload
    %autoreload 2
    %reset
except:
    print 'NOT IPYTHON'

from __future__ import division

import os
import sys
import glob

import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import scipy.io as io
import h5py
import matplotlib.pyplot as plt
from matplotlib import rcParams

sys.path.append('/Users/akhambhati/Developer/hoth_research/Echobase')
import Echobase
convert_conn_vec_to_adj_matr = Echobase.Network.Transforms.configuration.convert_conn_vec_to_adj_matr
convert_adj_matr_to_cfg_matr = Echobase.Network.Transforms.configuration.convert_adj_matr_to_cfg_matr
nmf = Echobase.Network.Partitioning.Subgraph.nmf

rcParams = Echobase.Plotting.fig_format.update_rcparams(rcParams)

path_CoreData = '/Users/akhambhati/Remotes/CORE.fMRI_multiband.mmattar/restdata'
path_PeriphData = '/Users/akhambhati/Remotes/RSRCH.NMF_Subnetworks'
path_InpData = path_PeriphData + '/e01-Dyne_FuncNetw'
path_ExpData = path_PeriphData + '/e03b-DynFuncSubgraph-Population'

for path in [path_CoreData, path_PeriphData, path_InpData, path_ExpData]:
    if not os.path.exists(path):
        print('Path: {}, does not exist'.format(path))
        os.makedirs(path)
        
if not os.path.exists('./e03b-Figures'):
    os.makedirs('./e03b-Figures')        

## Generate List of Data

In [None]:
subj_date = [full_subj_path.split('/')[-1]
             for full_subj_path in glob.iglob('{}/Adjacency.*.npz'.format(path_InpData))]

subj_ids = {}
for s_d in subj_date:
    subj, date = s_d.split('.')[1:3]
    try:
        subj_ids[subj]
    except KeyError:
        subj_ids[subj] = []
    
    subj_ids[subj].append(date)

## Construct Configuration Matrices
*__WARNING: Will Delete Existing Output__*

In [None]:
# Remove all existing output (retains pipe/pipeline definitions)
rm_outp = glob.glob("{}/NMF_Optimization.CfgMatr.npz".format(path_ExpData))

for rm_type in [rm_outp]:
    for path in rm_type:
        try:
            os.remove(path)
        except:
            print("{} not found".format(path))

In [None]:
cfg_matr = []
cfg_name = []
for df_name in subj_date:
    
    # Read the input data
    df = np.load('{}/{}'.format(path_InpData, df_name))

    for cfg_vec in convert_adj_matr_to_cfg_matr(df['adj_matr']):
        cfg_matr.append(cfg_vec)
        cfg_name.append('.'.join(df_name.split('.')[1:3]))

# Cache the configuration matrices
cfg_matr = np.array(cfg_matr)
cfg_name = np.array(cfg_name)
np.savez('{}/NMF_Optimization.CfgMatr.npz'.format(path_ExpData),
         cfg_matr=cfg_matr, cfg_name=cfg_name)

# Optimize Dynamic Subgraphs

## NMF Cross-Validation Optimizaion

In [None]:
# Load configuration matrix
cfg_data = np.load('{}/NMF_Optimization.CfgMatr.npz'.format(path_ExpData))
cfg_matr = cfg_data['cfg_matr']

# Set search params
search_alpha = list(np.arange(0.01, 1.0, 0.1))
search_beta = list(np.arange(0.01, 1.0, 0.1))
search_rank = list(np.arange(2, 31))
search_fold = 10

# Cross-Validation Optimization
str_path = '{}/NMF_Optimization.Error.txt'.format(path_ExpData)
if os.path.exists(str_path):
    os.remove(str_path)
    
opt_dict = Echobase.Network.Partitioning.Subgraph.optimize_nmf.cross_validation(
    cfg_matr, search_alpha, search_beta, search_rank, search_fold, n_proc=7,
    str_path=str_path)

np.savez('{}/NMF_Optimization.Error.npz'.format(path_ExpData),
         alpha=opt_dict['alpha'],
         beta=opt_dict['beta'],
         rank=opt_dict['rank'],
         error=opt_dict['error'],
         pct_sparse_subgraph=opt_dict['pct_sparse_subgraph'],
         pct_sparse_coef=opt_dict['pct_sparse_coef'])

In [None]:
opt_dict = np.load('{}/NMF_Optimization.Error.npz'.format(path_ExpData))
opt_params = Echobase.Network.Partitioning.Subgraph.optimize_nmf.min_crossval_param(dict(opt_dict))
print('Optimal Rank: {}'.format(opt_params['rank']))
print('Optimal Alpha: {}'.format(opt_params['alpha']))
print('Optimal Beta: {}'.format(opt_params['beta']))


## Plot rank sweep
rank_list = np.unique(opt_dict['rank'])
opt_rank_ix = np.flatnonzero(rank_list == opt_params['rank'])
error_rank_avg = np.array([opt_dict['error'][opt_dict['rank']==rank].mean()
                           for rank in rank_list])
error_rank_ste = np.array([opt_dict['error'][opt_dict['rank']==rank].std() / np.sqrt((opt_dict['rank']==rank).sum())
                           for rank in rank_list])

plt.figure()
ax = plt.subplot(111)
ax.plot(rank_list, error_rank_avg)
ax.fill_between(rank_list,
                y1=error_rank_avg - error_rank_ste, 
                y2=error_rank_avg + error_rank_ste, 
                lw=0, alpha=0.5)
ax.scatter(rank_list[opt_rank_ix], error_rank_avg[opt_rank_ix], marker='x', color='r', s=30)

ax.set_ylim([0.35, 0.37])
ax.set_xlim([0, rank_list.max()])
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xlabel('Number of Subgraphs')
ax.set_ylabel('Cross-Validation Error')


## Plot alpha sweep
alpha_list = np.unique(opt_dict['alpha'])
opt_alpha_ix = np.flatnonzero(alpha_list == opt_params['alpha'])
error_alpha_avg = np.array([opt_dict['error'][opt_dict['alpha']==alpha].mean()
                           for alpha in alpha_list])
error_alpha_ste = np.array([opt_dict['error'][opt_dict['alpha']==alpha].std() / np.sqrt((opt_dict['alpha']==alpha).sum())
                           for alpha in alpha_list])

plt.figure()
ax = plt.subplot(111)
ax.plot(alpha_list, error_alpha_avg)
ax.fill_between(alpha_list,
                y1=error_alpha_avg - error_alpha_ste, 
                y2=error_alpha_avg + error_alpha_ste, 
                lw=0, alpha=0.5)
ax.scatter(alpha_list[opt_alpha_ix], error_alpha_avg[opt_alpha_ix], marker='x', color='r', s=30)

ax.set_ylim([0.35, 0.37])
ax.set_xlim([0, alpha_list.max()*1.1])
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xlabel('Alpha')
ax.set_ylabel('Cross-Validation Error')


## Plot beta sweep
beta_list = np.unique(opt_dict['beta'])
opt_beta_ix = np.flatnonzero(beta_list == opt_params['beta'])
error_beta_avg = np.array([opt_dict['error'][opt_dict['beta']==beta].mean()
                           for beta in beta_list])
error_beta_ste = np.array([opt_dict['error'][opt_dict['beta']==beta].std() / np.sqrt((opt_dict['beta']==beta).sum())
                           for beta in beta_list])

plt.figure()
ax = plt.subplot(111)
ax.plot(beta_list, error_beta_avg)
ax.fill_between(alpha_list,
                y1=error_beta_avg - error_beta_ste, 
                y2=error_beta_avg + error_beta_ste, 
                lw=0, alpha=0.5)
ax.scatter(beta_list[opt_beta_ix], error_beta_avg[opt_beta_ix], marker='x', color='r', s=30)

ax.set_ylim([0.35, 0.37])
ax.set_xlim([0, beta_list.max()*1.1])
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_xlabel('Beta')
ax.set_ylabel('Cross-Validation Error')

# Detect Dynamic Subgraphs

## Run Non-Negative Matrix Factorization Algorithm
*__WARNING: Will Delete Existing Output__*

In [None]:
# Remove all existing output (retains pipe/pipeline definitions)
rm_outp = glob.glob("{}/*.subgraph_seed-*.npz".format(path_ExpData))

for rm_type in [rm_outp]:
    for path in rm_type:
        try:
            os.remove(path)
        except:
            print("{} not found".format(path))

In [None]:
param = {'rank': 20,
         'alpha': 0.91,
         'beta': 0.21}
n_seed = 100

from multiprocessing import Pool
parallel_run = True

# Generate a processing joblist
cfg_matr_path = glob.glob("{}/NMF_Optimization.CfgMatr.npz".format(path_ExpData))[0]
proc_list = []
for seed in xrange(n_seed):
    proc_list.append({'path': cfg_matr_path,
                      'param': param,
                      'seed': seed+1})
    
# Setup helper function to map pipeline run
def _nmf_helper(proc_item):
    
    # Load the file
    #if os.path.exists(inp_path):
    #    return 0
    print(" -- Processing Seed: {}".format(proc_item['seed']))
    data = np.load(proc_item['path'], mmap_mode='r')
    
    # Initialize the factors for NMF
    fac_subnet = np.random.uniform(low=0, high=1.0,
                                   size=(proc_item['param']['rank'],
                                         data['cfg_matr'].shape[1]))
    fac_coef = np.random.uniform(low=0, high=1.0,
                                 size=(proc_item['param']['rank'],
                                       data['cfg_matr'].shape[0]))

    # Run NMF Algorithm
    fac_subnet, fac_coef, err = nmf.snmf_bcd(
        data['cfg_matr'],
        alpha=proc_item['param']['alpha'],
        beta=proc_item['param']['beta'],
        fac_subnet_init=fac_subnet,
        fac_coef_init=fac_coef,
        max_iter=100, verbose=False)
    
    # Cache the NMF result
    np.savez("{}/NMF_Optimization.subgraph_seed-{}.npz".format(path_ExpData,
                                                               proc_item['seed']),
             fac_subnet=fac_subnet, fac_coef=fac_coef, err=err,
             param=proc_item['param'], path=proc_item['path'])

if parallel_run:
    mp = Pool(7)
    mp.map(_nmf_helper, proc_list)
else:
    map(_nmf_helper, proc_list)

## Consensus Clustering of Dynamic Subgraphs

In [None]:
seed_paths = glob.glob("{}/NMF_Optimization.subgraph_seed-*.npz".format(path_ExpData))

# Aggregate the estimated subgraphs of each seed
fac_subnet_seeds = []
for ii, path in enumerate(seed_paths):
    data = np.load(path, mmap_mode='r')
    fac_subnet = data['fac_subnet'][:, :]
    data.close()

    n_fac = fac_subnet.shape[0]
    n_conn = fac_subnet.shape[1]

    for iy in xrange(fac_subnet.shape[0]):
        fac_subnet_seeds.append(fac_subnet[iy, :])
fac_subnet_seeds = np.array(fac_subnet_seeds)

n_obs = fac_subnet_seeds.shape[0]
n_conn = fac_subnet_seeds.shape[1]

# Consensus Subgraphs
fac_cons_subnet, fac_cons_seeds, err = nmf.snmf_bcd(
    fac_subnet_seeds,
    alpha=0.0,
    beta=0.0,
    fac_subnet_init=np.random.uniform(low=0.0, high=1.0, size=(n_fac, n_conn)),
    fac_coef_init=np.random.uniform(low=0.0, high=1.0, size=(n_fac, n_obs)),
    max_iter=100, verbose=False)

# Consensus Coefficients
cfg_matr_path = glob.glob("{}/NMF_Optimization.CfgMatr.npz".format(path_ExpData))[0]
data_cfg = np.load(cfg_matr_path, mmap_mode='r')
n_win = data_cfg['cfg_matr'].shape[0]
fac_cons_subnet_2, fac_cons_coef_2, err = nmf.snmf_bcd(
    data_cfg['cfg_matr'],
    alpha=0.0,
    beta=0.0,
    fac_subnet_init=fac_cons_subnet,
    fac_coef_init=np.random.uniform(low=0.0, high=1.0, size=(n_fac, n_win)),
    max_iter=100, verbose=False)

# Cache the Consensus NMF result
np.savez("{}/NMF_Optimization.consensus_subgraph.npz".format(path_ExpData),
         fac_subnet=fac_cons_subnet_2, fac_coef=fac_cons_coef_2, err=err)

### Plot Subgraphs

In [None]:
%matplotlib inline

# Load the consensus data
data = np.load("{}/NMF_Optimization.consensus_subgraph.npz".format(path_ExpData),
               mmap_mode='r')
fac_subnet = data['fac_subnet']
fac_coef = data['fac_coef']

# Normalize
fac_subnet = fac_subnet / fac_subnet.max()
fac_coef = fac_coef / fac_coef.max()

n_fac = fac_subnet.shape[0]
n_conn = fac_subnet.shape[1]
n_win = fac_coef.shape[1]

# Plot subgraph matrix
plt.figure()
ax = plt.subplot(111)
mat = ax.matshow(fac_subnet.T, aspect=n_fac/n_conn, cmap='rainbow', vmin=0, vmax=1)
plt.colorbar(mat, ax=ax)

ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
#ax.set_xticks(np.linspace(0, 80, 5))
ax.set_ylabel('Functional Interactions')
ax.set_xlabel('Subgraphs')

plt.savefig('./e03b-Figures/Subgraph-Cfg_Matrix.svg')
plt.close()      

# Plot subgraph adjacency
plt.figure()
n_row = np.floor(np.sqrt(n_fac))
n_col = np.ceil(n_fac / n_row)
for ii, subg in enumerate(fac_subnet):
    adj = convert_conn_vec_to_adj_matr(subg)

    ax = plt.subplot(n_row, n_col, ii+1)
    mat = ax.matshow(adj, cmap='rainbow', vmin=0, vmax=1)
    #plt.colorbar(mat, ax=ax)
    ax.set_axis_off()
    
plt.savefig('./e03b-Figures/Subgraph-Adj_Matrices.svg')
plt.show()
plt.close()      

# Plot Coefficients
plt.figure()
ax = plt.subplot(111)

fac_coef = fac_coef.T
norm_fac = fac_coef - fac_coef.mean(axis=0)
for ff in xrange(n_fac):
    ax.plot(ff + norm_fac[:, ff] / (3*np.std(norm_fac[:, ff])), color=[66/256., 152/256., 221./256])

# Axis Settings
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_ylim([-1, n_fac+1])
ax.set_ylabel('Subgraphs')
ax.set_xlabel('Time Windows')

plt.savefig('./e03b-Figures/Subgraph-Coefs.svg')
plt.show()
plt.close()  

# Detect Geometric-Null Subgraphs

## Generate Geometric Adjacency Matrix

In [None]:
# Generate Node Distance Matrix
df = pd.read_csv('{}/Atlas/HOA112_Labels.csv'.format(path_CoreData))
n_node = len(df)

dist_matr = np.zeros((n_node, n_node))
ix, iy = np.mgrid[:n_node, :n_node]

dX = np.array(df.X)
dY = np.array(df.Y)
dZ = np.array(df.Z)

dist_matr[ix, iy] = np.sqrt((dX[ix]-dX[iy])**2 + 
                            (dY[ix]-dY[iy])**2 +
                            (dZ[ix]-dZ[iy])**2)
dist_matr /= dist_matr.max()

np.savez('{}/geom_adj_matr.npz'.format(path_ExpData),
         dist_matr=dist_matr)

## Run Non-Negative Matrix Factorization Algorithm
*__WARNING: Will Delete Existing Output__*

In [None]:
# Remove all existing output (retains pipe/pipeline definitions)
rm_outp = glob.glob("{}/NMF_GeomNull.subgraph_seed-*.npz".format(path_ExpData))

for rm_type in [rm_outp]:
    for path in rm_type:
        try:
            os.remove(path)
        except:
            print("{} not found".format(path))

In [None]:
param = {'rank': 20,
         'alpha': 0.91,
         'beta': 0.21}
n_seed = 100

from multiprocessing import Pool
parallel_run = True

# Generate a processing joblist
cfg_matr_path = glob.glob("{}/NMF_Optimization.CfgMatr.npz".format(path_ExpData))[0]
proc_list = []
for seed in xrange(n_seed):
    proc_list.append({'path': cfg_matr_path,
                      'param': param,
                      'seed': seed+1,
                      'dist_path': '{}/geom_adj_matr.npz'.format(path_ExpData)})
    
# Setup helper function to map pipeline run
def _nmf_helper(proc_item):
    
    # Load the file
    #if os.path.exists(inp_path):
    #    return 0
    print(" -- Processing Seed: {}".format(proc_item['seed']))
    data = np.load(proc_item['path'], mmap_mode='r')
    dist_matr = np.load(proc_item['dist_path'])['dist_matr']
    
    # Generate a geometric null topology from cfg_matr
    cfg_matr_null = []
    for vec_i, cfg_vec in enumerate(data['cfg_matr']):
        if vec_i == 0:
            perm_seq = np.random.permutation(len(cfg_vec))
        adj_matr = convert_conn_vec_to_adj_matr(cfg_vec)        
        adj_null = Echobase.Network.Rewire.geometry.surrogate_trend(adj_matr, dist_matr, 3, 3, perm_seq=perm_seq)
        cfg_matr_null.append(convert_adj_matr_to_cfg_matr(np.expand_dims(adj_null, axis=0)).reshape(-1))
    cfg_matr_null = np.array(cfg_matr_null)
    
    # Initialize the factors for NMF
    fac_subnet = np.random.uniform(low=0, high=1.0,
                                   size=(proc_item['param']['rank'],
                                         cfg_matr_null.shape[1]))
    fac_coef = np.random.uniform(low=0, high=1.0,
                                 size=(proc_item['param']['rank'],
                                       cfg_matr_null.shape[0]))

    # Run NMF Algorithm
    fac_subnet, fac_coef, err = nmf.snmf_bcd(
        cfg_matr_null,
        alpha=proc_item['param']['alpha'],
        beta=proc_item['param']['beta'],
        fac_subnet_init=fac_subnet,
        fac_coef_init=fac_coef,
        max_iter=100, verbose=False)
    
    # Cache the NMF result
    np.savez("{}/NMF_GeomNull.subgraph_seed-{}.npz".format(path_ExpData,                                                     
                                                           proc_item['seed']),
             fac_subnet=fac_subnet, fac_coef=fac_coef, err=err,
             param=proc_item['param'], path=proc_item['path'])

if parallel_run:
    mp = Pool(7)
    mp.map(_nmf_helper, proc_list)
else:
    map(_nmf_helper, proc_list)

### Plot Subgraphs

In [None]:
seed_paths = glob.glob("{}/NMF_GeomNull.subgraph_seed-*.npz".format(path_ExpData))
data = np.load(np.random.permutation(seed_paths)[0])

fac_subnet = data['fac_subnet']
fac_coef = data['fac_coef']

# Normalize
fac_subnet = fac_subnet / fac_subnet.max()
fac_coef = fac_coef / fac_coef.max()

n_fac = fac_subnet.shape[0]
n_conn = fac_subnet.shape[1]
n_win = fac_coef.shape[1]
             
# Plot subgraph adjacency
plt.figure()
n_row = np.floor(np.sqrt(n_fac))
n_col = np.ceil(n_fac / n_row)
for ii, subg in enumerate(fac_subnet):
    adj = convert_conn_vec_to_adj_matr(subg)

    ax = plt.subplot(n_row, n_col, ii+1)
    mat = ax.matshow(adj, cmap='rainbow', vmin=0, vmax=1)
    ax.set_axis_off()
    
plt.savefig('./e03b-Figures/GeomNull-Subgraph-Adj_Matrices.svg')
plt.show()
plt.close()      

# Plot Coefficients
plt.figure()
ax = plt.subplot(111)

fac_coef = fac_coef.T
norm_fac = fac_coef - fac_coef.mean(axis=0)
for ff in xrange(n_fac):
    ax.plot(ff + norm_fac[:, ff] / (3*np.std(norm_fac[:, ff])), color=[66/256., 152/256., 221./256])

# Axis Settings
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_ylim([-1, n_fac+1])
ax.set_ylabel('Subgraphs')
ax.set_xlabel('Time Windows')

plt.savefig('./e03b-Figures/GeomNull-Subgraph-Coefs.svg')
plt.show()
plt.close()  

# Detect Edge Constrained Null Subgraphs

## Run Non-Negative Matrix Factorization Algorithm
*__WARNING: Will Delete Existing Output__*

In [None]:
# Remove all existing output (retains pipe/pipeline definitions)
rm_outp = glob.glob("{}/NMF_EdgeNull.subgraph_seed-*.npz".format(path_ExpData))

for rm_type in [rm_outp]:
    for path in rm_type:
        try:
            os.remove(path)
        except:
            print("{} not found".format(path))

In [None]:
param = {'rank': 20,
         'alpha': 0.91,
         'beta': 0.21}
n_seed = 100

from multiprocessing import Pool
parallel_run = True

# Generate a processing joblist
cfg_matr_path = glob.glob("{}/NMF_Optimization.CfgMatr.npz".format(path_ExpData))[0]
proc_list = []
for seed in xrange(n_seed):
    proc_list.append({'path': cfg_matr_path,
                      'param': param,
                      'seed': seed+1})
    
# Setup helper function to map pipeline run
def _nmf_helper(proc_item):
    
    # Load the file
    #if os.path.exists(inp_path):
    #    return 0
    print(" -- Processing Seed: {}".format(proc_item['seed']))
    data = np.load(proc_item['path'], mmap_mode='r')
    
    # Generate an edge null topology from cfg_matr
    cfg_matr_null = []
    for vec_i, cfg_vec in enumerate(data['cfg_matr']):
        if vec_i == 0:
            perm_seq = np.random.permutation(len(cfg_vec))
        cfg_matr_null.append(cfg_vec[perm_seq])
    cfg_matr_null = np.array(cfg_matr_null)
    
    # Initialize the factors for NMF
    fac_subnet = np.random.uniform(low=0, high=1.0,
                                   size=(proc_item['param']['rank'],
                                         cfg_matr_null.shape[1]))
    fac_coef = np.random.uniform(low=0, high=1.0,
                                 size=(proc_item['param']['rank'],
                                       cfg_matr_null.shape[0]))

    # Run NMF Algorithm
    fac_subnet, fac_coef, err = nmf.snmf_bcd(
        cfg_matr_null,
        alpha=proc_item['param']['alpha'],
        beta=proc_item['param']['beta'],
        fac_subnet_init=fac_subnet,
        fac_coef_init=fac_coef,
        max_iter=100, verbose=False)
    
    # Cache the NMF result
    np.savez("{}/NMF_EdgeNull.subgraph_seed-{}.npz".format(path_ExpData,                                                     
                                                           proc_item['seed']),
             fac_subnet=fac_subnet, fac_coef=fac_coef, err=err,
             param=proc_item['param'], path=proc_item['path'])

if parallel_run:
    mp = Pool(7)
    mp.map(_nmf_helper, proc_list)
else:
    map(_nmf_helper, proc_list)

### Plot Subgraphs

In [None]:
seed_paths = glob.glob("{}/NMF_EdgeNull.subgraph_seed-*.npz".format(path_ExpData))
data = np.load(np.random.permutation(seed_paths)[0])

fac_subnet = data['fac_subnet']
fac_coef = data['fac_coef']

# Normalize
fac_subnet = fac_subnet / fac_subnet.max()
fac_coef = fac_coef / fac_coef.max()

n_fac = fac_subnet.shape[0]
n_conn = fac_subnet.shape[1]
n_win = fac_coef.shape[1]
             
# Plot subgraph adjacency
plt.figure()
n_row = np.floor(np.sqrt(n_fac))
n_col = np.ceil(n_fac / n_row)
for ii, subg in enumerate(fac_subnet):
    adj = convert_conn_vec_to_adj_matr(subg)

    ax = plt.subplot(n_row, n_col, ii+1)
    mat = ax.matshow(adj, cmap='rainbow', vmin=0, vmax=1)
    ax.set_axis_off()
    
plt.savefig('./e03b-Figures/EdgeNull-Subgraph-Adj_Matrices.svg')
plt.show()
plt.close()      

# Plot Coefficients
plt.figure()
ax = plt.subplot(111)

fac_coef = fac_coef.T
norm_fac = fac_coef - fac_coef.mean(axis=0)
for ff in xrange(n_fac):
    ax.plot(ff + norm_fac[:, ff] / (3*np.std(norm_fac[:, ff])), color=[66/256., 152/256., 221./256])

# Axis Settings
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
ax.set_ylim([-1, n_fac+1])
ax.set_ylabel('Subgraphs')
ax.set_xlabel('Time Windows')

plt.savefig('./e03b-Figures/EdgeNull-Subgraph-Coefs.svg')
plt.show()
plt.close()  