In [1]:
# %load uoicorr_imports.py
%load_ext autoreload
%autoreload 2

import h5py

import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import correlate
from math import floor, ceil

# Script to add the entire project directory structure to the python path
import sys, os

# Hack to import pyuoi
parent_path, current_dir = os.path.split(os.path.abspath('.'))
while current_dir not in ['nse']:
	parent_path, current_dir = os.path.split(parent_path)
p = os.path.join(parent_path, current_dir)
# Add analysis
if p not in sys.path:
	sys.path.append(p)

# And standard list of subdirectories
if '%s\\pyuoi' % p not in sys.path:
	sys.path.append('%s\\pyuoi' % p)

from pyuoi.linear_model.lasso import UoI_Lasso
from pyuoi.linear_model.elasticnet import UoI_ElasticNet

from postprocess import postprocess_file, postprocess_dir

In [3]:
os.chdir('C:/Users/Ankit/nse/uoicorr')
dat = postprocess_dir('est_comparison')

In [7]:
dat.iloc[0]

AIC_fn         [[[[ 9. 10.  9.  8.], [3. 2. 3. 0.], [2. 2. 2....
AIC_fp         [[[[0. 0. 0. 0.], [0. 0. 0. 0.], [0. 0. 0. 0.]...
AIC_scores     [[[[ 0.01386795 -0.0140506   0.12281205  0.325...
AICc_fn        [[[[0. 0. 7. 0.], [0. 0. 1. 0.], [2. 2. 2. 2.]...
AICc_fp        [[[[0. 0. 3. 0.], [26.  0. 25.  0.], [0. 0. 0....
AICc_scores    [[[[       nan        nan 0.30145801        na...
BIC_fn         [[[[10. 10.  9.  9.], [4. 2. 3. 0.], [3. 2. 2....
BIC_fp         [[[[0. 0. 0. 0.], [0. 0. 0. 0.], [0. 0. 0. 0.]...
BIC_scores     [[[[-1.08816273e-06 -1.40506035e-02  1.2676320...
betadist                                             ['uniform']
betas          [[[[0.         0.         0.         0.       ...
block_size                                       [6, 12, 20, 30]
correlation                                                    0
r2_fn          [[[[2. 2. 4. 1.], [0. 1. 2. 0.], [0. 1. 1. 0.]...
r2_fp          [[[[16. 10. 21. 15.], [15.  6.  6. 26.], [ 3. ...
r2_scores      [[[[ 0.737

In [18]:
dat.iloc[0].r2_scores.va

AttributeError: 'numpy.ndarray' object has no attribute 'values'

### Plot the performance of the four estimation scores on a 2D grid of sparsities and number of samples --> do so for each block_size and correlation strength

In [31]:
d = dat.loc[dat['correlation'] == 0]

# Which block size to use out of [6, 12, 20, 30]
block_idx = 0

# Assemble scores for various sparsities
r2_scores_avg = np.zeros((10, 10))
for i, s in enumerate(np.unique(d.iloc[:, 16])):
    r2_scores_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].r2_scores.values[0][:, :, :, block_idx], 0)

In [43]:
os.chdir('C:\\Users\\Ankit\\nse\\uoicorr')

In [45]:
correlations = [0, 0.2, 0.4, 0.6, 0.8, 1]
block_sizes = [6, 12, 20, 30]

for cidx, correlation in enumerate(correlations):
    for bidx, block_size in enumerate(block_sizes):

        d = dat.loc[dat['correlation'] == correlation]

        # Assemble scores for various sparsities
        r2_scores_avg = np.zeros((10, 10))
        BIC_scores_avg = np.zeros((10, 10))
        AIC_scores_avg = np.zeros((10, 10))
        AICc_scores_avg = np.zeros((10, 10))

        for i, s in enumerate(np.unique(d.iloc[:, 16])):
            r2_scores_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].r2_scores.values[0][:, :, :, bidx], 0)
            BIC_scores_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].BIC_scores.values[0][:, :, :, bidx], 0)
            AIC_scores_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].AIC_scores.values[0][:, :, :, bidx], 0)
            # AICc will have some nan values so use nanmean
            AICc_scores_avg[i, :] = np.nanmean(d.loc[dat['sparsity'] == s].AICc_scores.values[0][:, :, :, bidx], 0)

        fig, ax = plt.subplots(2, 2, figsize = (15, 15))

        scores = [r2_scores_avg, BIC_scores_avg, AIC_scores_avg, AICc_scores_avg]
        snames = ['R2', 'BIC', 'AIC', 'AICc']
        for j in range(len(scores)):
            a = ax[np.unravel_index(j, (2, 2))]

            a.set_aspect('equal')
            c = a.pcolor(scores[j], cmap = 'Greys', vmin = 0, vmax = 1)
            a.set_xticks(np.arange(11)[:-1]+0.5)
            a.set_yticks(np.arange(11)[:-1]+0.5)
            a.set_ylabel('Sparsity', fontsize = 16)
            a.set_xlabel('Sample size', fontsize = 16)
            a.set_xticklabels(n_features * np.linspace(1, 50, 10).astype(int))
            a.set_yticklabels(np.linspace(0.2, 1, 10))
            # get the current labels 
            labels = [item.get_text() for item in a.get_yticklabels()]
            # Beat them into submission and set them back again
            a.set_yticklabels([str(round(float(label), 2)) for label in labels])
            a.set_title('R^2 score using %s estimation score' % snames[j], fontsize = 16)

        fig.suptitle('Correlation = %f, Block Size = %f' % (correlation, block_size), fontsize = 16)
        fig.savefig('figs/est_comparison/%d_%d.png' % (cidx, bidx))
        plt.close()



### Repeat for selection accuracy

In [63]:
correlations = [0, 0.2, 0.4, 0.6, 0.8, 1]
block_sizes = [6, 12, 20, 30]

for cidx, correlation in enumerate(correlations):
    for bidx, block_size in enumerate(block_sizes):

        d = dat.loc[dat['correlation'] == correlation]

        # Assemble selection accuracy averages
        r2_fp_avg = np.zeros((10, 10))
        BIC_fp_avg = np.zeros((10, 10))
        AIC_fp_avg = np.zeros((10, 10))
        AICc_fp_avg = np.zeros((10, 10))
        
        r2_fn_avg = np.zeros((10, 10))
        BIC_fn_avg = np.zeros((10, 10))
        AIC_fn_avg = np.zeros((10, 10))
        AICc_fn_avg = np.zeros((10, 10))

        r2_sa_avg = np.zeros((10, 10))
        BIC_sa_avg = np.zeros((10, 10))
        AIC_sa_avg = np.zeros((10, 10))
        AICc_sa_avg = np.zeros((10, 10))

        for i, s in enumerate(np.unique(d.iloc[:, 16])):
            r2_fp_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].r2_fp.values[0][:, :, :, bidx], 0)
            BIC_fp_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].BIC_fp.values[0][:, :, :, bidx], 0)
            AIC_fp_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].AIC_fp.values[0][:, :, :, bidx], 0)
            # AICc will have some nan values so use nanmean
            AICc_fp_avg[i, :] = np.nanmean(d.loc[dat['sparsity'] == s].AICc_fp.values[0][:, :, :, bidx], 0)

            r2_fn_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].r2_fn.values[0][:, :, :, bidx], 0)
            BIC_fn_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].BIC_fn.values[0][:, :, :, bidx], 0)
            AIC_fn_avg[i, :] = np.mean(d.loc[dat['sparsity'] == s].AIC_fn.values[0][:, :, :, bidx], 0)
            # AICc will have some nan values so use nanmean
            AICc_fn_avg[i, :] = np.nanmean(d.loc[dat['sparsity'] == s].AICc_fn.values[0][:, :, :, bidx], 0)

            non_zero_beta = np.count_nonzero(d.loc[dat['sparsity'] == s].betas.values[0][:, :, bidx, :], 2)
            
            r2_sa_avg[i, :] = 1 - (r2_fp_avg[i, :] + r2_fn_avg[i, :])/(non_zero_beta)
            BIC_sa_avg[i, :] = 1 - (BIC_fp_avg[i, :] + BIC_fn_avg[i, :])/(non_zero_beta)
            AIC_sa_avg[i, :] = 1 - (AIC_fp_avg[i, :] + AIC_fn_avg[i, :])/(non_zero_beta)
            AICc_sa_avg[i, :] = 1 - (AICc_fp_avg[i, :] + AICc_fn_avg[i, :])/(non_zero_beta)
            
            
            
            
        fig, ax = plt.subplots(2, 2, figsize = (15, 15))

        sa = [r2_sa_avg, BIC_sa_avg, AIC_sa_avg, AICc_sa_avg]
        snames = ['R2', 'BIC', 'AIC', 'AICc']
        for j in range(len(scores)):
            a = ax[np.unravel_index(j, (2, 2))]

            a.set_aspect('equal')
            c = a.pcolor(sa[j], cmap = 'Greys', vmin = 0, vmax = 1)
            a.set_xticks(np.arange(11)[:-1]+0.5)
            a.set_yticks(np.arange(11)[:-1]+0.5)
            a.set_ylabel('Sparsity', fontsize = 16)
            a.set_xlabel('Sample size', fontsize = 16)
            a.set_xticklabels(n_features * np.linspace(1, 50, 10).astype(int))
            a.set_yticklabels(np.linspace(0.2, 1, 10))
            # get the current labels 
            labels = [item.get_text() for item in a.get_yticklabels()]
            # Beat them into submission and set them back again
            a.set_yticklabels([str(round(float(label), 2)) for label in labels])
            a.set_title('Selection accuracy score using %s estimation score' % snames[j], fontsize = 16)

        fig.suptitle('Correlation = %f, Block Size = %f' % (correlation, block_size), fontsize = 16)
        fig.savefig('figs/est_comparison/sa_%d_%d.png' % (cidx, bidx))
        plt.close()

In [53]:
r2_fn_avg

array([[ 5.8,  1. ,  2. ,  2.6,  1. ,  0.8,  0. ,  0. ,  0.2,  0.8],
       [ 6.8,  2.4,  0.2,  0.6,  0. ,  0.6,  0.2,  1. ,  1.2,  0. ],
       [15. ,  1.8,  2.4,  3. ,  0. ,  0. ,  0.4,  0. ,  0.2,  0.2],
       [15.4,  3. ,  0.2,  1.6,  0.2,  0.6,  0. ,  1.4,  0. ,  0. ],
       [18. ,  2.6,  0.2,  0. ,  0.4,  0. ,  0. ,  0. ,  0. ,  0. ],
       [21.4,  0.2,  0.6,  0. ,  0. ,  1.2,  0. ,  0. ,  0. ,  0. ],
       [31.4,  1.4,  0. ,  0.2,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [31. ,  0.8,  0.6,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [33.8,  1. ,  2.2,  0.2,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ],
       [50. ,  0.8,  0. ,  0.2,  0.6,  0. ,  0. ,  0. ,  0. ,  0. ]])

In [60]:
d.iloc[0].betas[:, :, 0, :].shape

(1, 10, 60)