In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

from os import path
from glob import glob
import pickle

import numpy as np
import pandas as pd
import scipy as sp
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
from IPython.display import display
from IPython.utils.capture import capture_output
from tqdm.auto import tqdm
with capture_output():
    tqdm.pandas()
import h5py

from ipympl.backend_nbagg import Canvas
Canvas.header_visible.default_value = False

In [2]:
stimulus_categories = {
    'stim_dg_90' : 'complex',
    'stim_dgc_90' : 'simple',
    'stim_flash_90' : 'simple',
    'stim_gabors_90' : 'simple',
    'stim_natural_movie_one_shuffled' : 'shuffled',
    'natural_movies' : 'natural',
    'stim_natural_movie_one' : 'natural',
    'stim_natural_movie_three' : 'natural',
    'spontaneous' : 'spontaneous',
    'stim_sg_90' : 'complex'
}

stim_cat_colors = {
    'spontaneous' : cm.Greys(0.8, 0.8),
    'simple' : cm.Reds(0.8, 0.8),
    'complex' : cm.Blues(0.8, 0.8),
    'natural' : cm.Greens(0.8, 0.8),
    'shuffled' : cm.Purples(0.4, 0.8)
    
}

stim_cat_colors_bg = {
    'spontaneous' : cm.Greys(0.5, 0.5),
    'simple' : cm.Reds(0.5, 0.5),
    'complex' : cm.Blues(0.5, 0.5),
    'natural' : cm.Greens(0.5, 0.5),
    'shuffled' : cm.Purples(0.4, 0.5)
    
}

In [3]:
cnn_models = {
    'VGG16' : '../tiny-blue-dot/differentiation/refactor/CNNs/vgg16_diffn.pkl',
    'ResNet50' : '../tiny-blue-dot/differentiation/refactor/CNNs/resnet_diffn.pkl',
    'InceptionV3' : '../tiny-blue-dot/differentiation/refactor/CNNs/inception_diffn.pkl'
}

differentiation = {
    k : pd.read_pickle(cnn_models[k]) for k in cnn_models.keys()
}

# differentiation = {
#     k : d / d.columns.get_level_values(1).map(np.sqrt) for k, d in differentiation.items()
# }

ylims = {'VGG16': (3291.9816826375727, 4367909.777423014),
 'ResNet50': (253.73407405560303, 72457426.7240913),
 'InceptionV3': (684.8871165012145, 10736165.239076167)}

xlims = {'VGG16': (0.5244677678629754, 20.435278098806037),
 'ResNet50': (0.49113097065560307, 17.424850357749584),
 'InceptionV3': (0.5543381394014095, 13.442327018738055)}

In [4]:
sns.axes_style('whitegrid')

{'figure.facecolor': 'white',
 'axes.labelcolor': '.15',
 'xtick.direction': 'out',
 'ytick.direction': 'out',
 'xtick.color': '.15',
 'ytick.color': '.15',
 'axes.axisbelow': True,
 'grid.linestyle': '-',
 'text.color': '.15',
 'font.family': ['sans-serif'],
 'font.sans-serif': ['Arial',
  'DejaVu Sans',
  'Liberation Sans',
  'Bitstream Vera Sans',
  'sans-serif'],
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'patch.force_edgecolor': True,
 'image.cmap': 'rocket',
 'xtick.top': False,
 'ytick.right': False,
 'axes.grid': True,
 'axes.facecolor': 'white',
 'axes.edgecolor': '.8',
 'grid.color': '.8',
 'axes.spines.left': True,
 'axes.spines.bottom': True,
 'axes.spines.right': True,
 'axes.spines.top': True,
 'xtick.bottom': False,
 'ytick.left': False}

In [5]:
win, sta = 3, 0.1

with sns.axes_style('whitegrid', rc={'axes.grid': False}):
    f, axes = plt.subplots(3, 1, figsize=(8, 5), tight_layout=True)

for ax, (k, diffn) in zip(axes, differentiation.items()):
    _df = diffn[win][sta].melt().dropna()
    _df = _df[_df.value>1]
    _df['stimulus_categories'] = _df.stimulus.map(stimulus_categories)
#     _df.head()

    flierprops = dict(markerfacecolor=cm.Greys(0.5, 0.5), markeredgecolor='none')

    sns.boxplot(
        x='layer', y='value', hue='stimulus_categories', data=_df,
        ax=ax, fliersize=0.2, linewidth=0.2, whis=1.5,
        palette=stim_cat_colors_bg, width=0.8,
        flierprops=flierprops, showfliers=False
    )
    ax.set_xlim(xlims[k])
    ax.set_ylim(ylims[k])
    ax.legend().set_visible(False)
    ax.set_ylabel(f'{k}\ndifferentiation')
    ax.set_yscale('log')
    ax.set_xlabel('')
    ax.set_xticks([])
    sns.despine(ax=ax)
ax.set_xlabel('CNN layer hierarchy')
axes[0].legend(ncol=4, fontsize=8, frameon=False);
f.savefig('fig_cnn_differentiation.pdf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
ylims = {}
for ax, k in zip(axes, differentiation.keys()):
    ylims[k] = ax.get_ylim()
ylims

{'VGG16': (3291.9816826375727, 4367909.777423014),
 'ResNet50': (253.73407405560303, 72457426.7240913),
 'InceptionV3': (684.8871165012145, 10736165.239076167)}

In [7]:
xlims = {}
for ax, k in zip(axes, differentiation.keys()):
    xlims[k] = ax.get_xlim()
xlims

{'VGG16': (0.5244677678629754, 20.435278098806037),
 'ResNet50': (0.49113097065560307, 17.424850357749584),
 'InceptionV3': (0.5543381394014095, 13.442327018738055)}

# Statistical tests for trends

In [8]:
def pearsonr_ci(x,y,alpha=0.05):
    ''' calculate Pearson correlation along with the confidence interval using scipy and numpy
    See https://zhiyzuo.github.io/Pearson-Correlation-CI-in-Python for reference
    Parameters
    ----------
    x, y : iterable object such as a list or np.array
      Input for correlation calculation
    alpha : float
      Significance level. 0.05 by default
    Returns
    -------
    r : float
      Pearson's correlation coefficient
    pval : float
      The corresponding p value
    lo, hi : float
      The lower and upper bound of confidence intervals
    '''

    r, p = sp.stats.pearsonr(x,y)
    r_z = np.arctanh(r)
    se = 1/np.sqrt(len(x)-3)
    z = sp.stats.norm.ppf(1-alpha/2)
    lo_z, hi_z = r_z-z*se, r_z+z*se
    lo, hi = np.tanh((lo_z, hi_z))
    return dict(r=r, p=p, ci_low=lo, ci_high=hi)

In [13]:
elb = {
    'VGG16' : 16,
    'ResNet50' : 11,
    'InceptionV3' : 10
}
corrs = {}
for k, diffn in differentiation.items():
    df = diffn[win][sta].stack(0).swaplevel().sort_index()
    df.index = df.index.get_level_values('stimulus').map(stimulus_categories)
    df = df.sort_index()
    corrs[f'{k} (up to {elb[k]})'] = df.groupby('stimulus').apply(
        lambda d: pd.Series(
            pearsonr_ci(
                *d[range(elb[k])].reset_index(drop=True)
                .unstack().reset_index('layer').values.T
            )
#             sp.stats.pearsonr(
#                 *d[range(elb[k])].reset_index(drop=True)
#                 .unstack().reset_index('layer').values.T
#             ), index=['correlation', 'p-value']
        )
    )
pd.set_option("display.precision", 2)
display(pd.concat(corrs))
pd.set_option("display.precision", 8)

Unnamed: 0_level_0,Unnamed: 1_level_0,r,p,ci_low,ci_high
Unnamed: 0_level_1,stimulus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
VGG16 (up to 16),complex,0.45,2.0399999999999998e-48,0.395,0.5
VGG16 (up to 16),natural,0.42,5.35e-36,0.364,0.48
VGG16 (up to 16),shuffled,0.6,2.53e-17,0.496,0.69
VGG16 (up to 16),simple,0.25,2.32e-22,0.203,0.3
ResNet50 (up to 11),complex,0.08,0.0339,0.00633,0.16
ResNet50 (up to 11),natural,0.67,1.93e-73,0.623,0.72
ResNet50 (up to 11),shuffled,0.73,3.23e-19,0.623,0.8
ResNet50 (up to 11),simple,0.12,0.000113,0.0606,0.18
InceptionV3 (up to 10),complex,0.49,5.5300000000000005e-37,0.423,0.55
InceptionV3 (up to 10),natural,0.49,3.88e-31,0.417,0.55


In [14]:
elb = {
    'VGG16' : 16,
    'ResNet50' : 11,
    'InceptionV3' : 10
}
corrs = {}
for k, diffn in differentiation.items():
    df = diffn[win][sta].stack(0).swaplevel().sort_index()
    df.index = df.index.get_level_values('stimulus').map(stimulus_categories)
    df = df.sort_index()
    corrs[f'{k} ({elb[k]} onwards)'] = df.groupby('stimulus').apply(
        lambda d: pd.Series(
            pearsonr_ci(
                *d[range(elb[k], len(d.columns)-1)].reset_index(drop=True)
                .unstack().reset_index('layer').values.T
            )
#             sp.stats.pearsonr(
#                 *d[range(elb[k], len(d.columns)-1)].reset_index(drop=True)
#                 .unstack().reset_index('layer').values.T
#             ), index=['correlation', 'p-value']
        )
    )
pd.set_option("display.precision", 2)
display(pd.concat(corrs))
pd.set_option("display.precision", 8)

Unnamed: 0_level_0,Unnamed: 1_level_0,r,p,ci_low,ci_high
Unnamed: 0_level_1,stimulus,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
VGG16 (16 onwards),complex,0.32,2.98e-07,0.21,0.43
VGG16 (16 onwards),natural,-0.01,0.877,-0.15,0.13
VGG16 (16 onwards),shuffled,0.11,0.51,-0.21,0.41
VGG16 (16 onwards),simple,0.02,0.691,-0.08,0.12
ResNet50 (11 onwards),complex,-0.47,8.35e-21,-0.54,-0.38
ResNet50 (11 onwards),natural,-0.68,1.28e-41,-0.73,-0.61
ResNet50 (11 onwards),shuffled,-0.84,5.19e-17,-0.9,-0.74
ResNet50 (11 onwards),simple,-0.6,2.62e-53,-0.65,-0.54
InceptionV3 (10 onwards),complex,-0.67,3.37e-25,-0.75,-0.59
InceptionV3 (10 onwards),natural,-0.75,1.34e-28,-0.81,-0.67
