In [1]:
import os
import neptune
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm as tqdm
from scipy.stats import ttest_ind as ttest,pearsonr
import scipy
import xarray as xr
from scipy.spatial.distance import pdist,squareform,cdist
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.cross_decomposition import CCA

# from tensorflow.python import keras as keras
from keras.models import Model

from src.results.experiments import _DateExperimentLoader
from src.results.utils import raw_to_xr, dprime
from src.results.neptune import get_model_files, load_models, load_assemblies, load_params, load_properties,prep_assemblies,NeptuneExperimentRun,generate_convnet_encoders
from src.results.dicarlo import get_dicarlo_su, process_dicarlo,err_neuroids
from src.data_loader import Shifted_Data_Loader
from src.data_generator import ShiftedDataBatcher
import src.rcca

import brainscore
from brainscore.assemblies import walk_coords,split_assembly
from brainscore.assemblies import split_assembly
# from brainscore.metrics import Score

from brainio_base.assemblies import DataAssembly

def set_style():
    # This sets reasonable defaults for font size for
    # a figure that will go in a paper
    sns.set_context("talk")
    
    # Set the font to be serif, rather than sans
    sns.set(font='serif')
    
    # Make the background white, and specify the
    # specific font family
    sns.set_style("white", {
        "font.family": "serif",
        "font.serif": ["Georgia","Times New Roman", "Palatino", "serif"]
    })

Using TensorFlow backend.


In [2]:
os.environ['NEPTUNE_API_TOKEN']="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5tbCIsImFwaV9rZXkiOiI3ZWExMTlmYS02ZTE2LTQ4ZTktOGMxMi0wMDJiZTljOWYyNDUifQ=="
neptune.init('elijahc/DuplexAE')
neptune.set_project('elijahc/DuplexAE')
proj_root = '/home/elijahc/projects/vae'

In [3]:
from src.results.neptune import load_configs

In [4]:
conv_eids = [
    'DPX-29',
    'DPX-30',
]
dense_eids = [
    'DPX-10',
    'DPX-16',
#     'DPX-27',
]
# eids = conv_eids+dense_eids
conv_exps = neptune.project.get_experiments(id=conv_eids)
dense_exps = neptune.project.get_experiments(id=dense_eids)
exps = np.array(conv_exps+dense_exps)
s_df = pd.DataFrame(list(load_configs(exps)))
s_df.head()

Unnamed: 0,assembly_fn,augmentation,batch_sz,bg,bg_contrast,dataset,dir,encoder_arch,generator_arch,id,im_translation,n_epochs,recon_weight,su_selectivity_fn,xent_weight,y_dim,z_dim
0,,dynamic,512.0,natural,0.3,fashion_mnist,models/2019-11-04/DPX-29,convnet,resnet,DPX-29,0.75,54000.0,0.0,,15.0,35,35
1,,dynamic,512.0,natural,0.3,fashion_mnist,models/2019-11-04/DPX-30,convnet,resnet,DPX-30,0.75,54000.0,1.0,,15.0,35,35
2,dataset.nc,dynamic,512.0,natural,0.3,fashion_mnist,models/2019-09-25/DPX-10,dense,resnet,DPX-10,0.75,54000.0,1.0,selectivity.pqt,15.0,35,35
3,,dynamic,512.0,natural,0.3,fashion_mnist,models/2019-09-25/DPX-16,dense,resnet,DPX-16,0.75,54000.0,0.0,,15.0,35,35


In [5]:
from sklearn.linear_model import Lasso,LassoCV,MultiTaskLassoCV,MultiTaskLasso,SGDRegressor
from sklearn.svm import SVR,LinearSVR,NuSVR

In [6]:
from sklearn.metrics import explained_variance_score
from sklearn.multioutput import MultiOutputRegressor,MultiOutputEstimator

In [7]:
def lasso(x,neural_data,region=None,brain_region=['IT','V4'], cv=5, variation=[3],sortby='image_id',train_size=0.75):
    var_lookup = stimulus_set[stimulus_set.variation.isin(variation)].image_id.values
    x = x.where(x.image_id.isin(var_lookup),drop=True)
    nd = neural_data.where(neural_data.image_id.isin(var_lookup),drop=True)
    
    num_images = x.shape[0]
    out_recs = []
#     {'region':[],
#             'layer':[],
#             'fve':[],
#             'pearsonr':[],
#             'p-value':[],
#             'neuron':[],
#             'depth':[],
#             'iter':[],
#            }
    
    cv_tr = []
    cv_te = []
    
    for rand_delta in np.arange(cv):
        tr_idx, te_idx, _,_ = train_test_split(np.arange(num_images),np.arange(num_images),train_size=train_size,random_state=np.random.randint(0,50)+rand_delta)
        cv_tr.append(tr_idx)
        cv_te.append(te_idx)
    
    tr,te, _,_ = train_test_split(np.arange(num_images),np.arange(num_images),train_size=train_size,random_state=7)

    
    for br in brain_region:
        nd_reg = nd.sel(region=br)[:,:20]
        if region is None:
            region = np.unique(x.region.values)
        
        with tqdm(region,total=len(region)) as reg_iter:
            for reg in reg_iter:
                if reg == 'pixel':
                    continue
                else:
                    x_reg = x.sel(region=reg)
                    depth = np.unique(x_reg.layer.values)[0]
                reg_iter.set_description('{}{} x {}{}'.format(reg,x_reg.shape,br,nd_reg.shape))
                
#             with tqdm(np.arange(nd_reg.shape[-1])) as neurons:
#                 fve_mean = []
#                 r_mean = []
#                 neurons.set_description('{}{} x {}{}'.format(reg,x_reg.shape,br,nd_reg.shape))
                
#                 for n_idx in neurons:
#             estimator = MultiOutputRegressor(SGDRegressor(early_stopping=True),n_jobs=5)
                estimator=MultiOutputRegressor(LinearSVR(), n_jobs=5)
    #                     estimator = SGDRegressor()
    #                     estimator = SVR()
                estimator.fit(x_reg.values[tr],nd_reg.values[tr])
                y_pred = estimator.predict(x_reg.values[te])
#                 print(type(y_pred))
#                 print(type(nd_reg.values[te]),nd_reg.values[te].shape)
    #             r,pv = pearsonr(nd_reg.values[te],y_pred)
                fve = explained_variance_score(nd_reg.values[te],y_pred)

    #                     score = estimator.score(x_reg.values,nd_reg.values[:,n_idx])

    #                     r,pv = pearsonr(ab_vec[0],ab_vec[1])
    #             r_mean.append(np.nan_to_num(r))
    #             fve_mean.append(fve)
                reg_iter.set_postfix(fve=fve)
                out_recs.append({
                    'region':br,
                    'layer':reg,
                    'fve':fve,
    #                 'cc':r,
    #                 'neuron':n_idx,
                    'depth':depth,
                })
#                     out_dict['region'].append(br)
#                     out_dict['layer'].append(reg)
#                     out_dict['fve'].append(fve)
#                     out_dict['pearsonr'].append(r)
#                     out_dict['neuron'].append(n_idx)
# #                     out_dict['iter'].append(n)
#                     out_dict['depth'].append(depth)

#                     neurons.set_postfix(r_max=np.max(r_mean), r_mean=np.mean(r_mean), r_var=np.var(r_mean), fve_max=np.max(fve_mean), fve_mean=np.mean(fve_mean))
#     print({k:v.shape for k,v in out_dict.items()})

    return pd.DataFrame.from_records(out_recs)

In [8]:
def cca(x,neural_data,region=None, brain_region=['IT','V4'], cv=5, n_components=5, variation=[3],sortby='image_id',train_size=0.75):
    var_lookup = stimulus_set[stimulus_set.variation.isin(variation)].image_id.values
    x = x.where(x.image_id.isin(var_lookup),drop=True)
    nd = neural_data.where(neural_data.image_id.isin(var_lookup),drop=True)
    
    x = x.sortby(sortby)
    nd = nd.sortby(sortby)
    
    assert list(getattr(x,sortby).values) == list(getattr(nd,sortby).values)
    num_images = x.shape[0]
    out_recs = []
#     out_dict = {'region':[],
#             'layer':[],
#             'pearsonr':[],
#             'fve':[],
# #             'p-value':[],
#             'iter':[],
#             'depth':[],
#            }
    
    cv_tr = []
    cv_te = []
    
    kf = KFold(n_splits=cv, shuffle=True, random_state=cv)
    for tr,te in kf.split(np.arange(num_images)):
        cv_tr.append(tr)
        cv_te.append(te)
    
    for rand_delta in np.arange(cv):
        tr_idx, te_idx, _,_ = train_test_split(np.arange(num_images),np.arange(num_images),train_size=train_size,random_state=np.random.randint(0,50)+rand_delta)
        cv_tr.append(tr_idx)
        cv_te.append(te_idx)
    
    for br in brain_region:
        nd_reg = nd.sel(region=br)
        
        if region is None:
            region = np.unique(x.region.values)
            
        for reg in region:
            if reg == 'pixel':
                continue
            x_reg = x.sel(region=reg)
            
            depth = np.unique(x_reg.layer.values)[0]
            with tqdm(zip(np.arange(cv),cv_tr,cv_te), total=cv) as t:
                t.set_description('{}{} x {}{}'.format(reg,x_reg.shape,br,nd_reg.shape))
                
                r_mean = []
                fve_mean = []
                cca_mean = []
                for n,tr,te in t:
                    cca = CCA(n_components=n_components)
                    cca.fit(x_reg.values[tr],nd_reg.values[tr])

                    u,v = cca.transform(x_reg.values[te],nd_reg.values[te])
                    
                    y_pred = cca.predict(x_reg.values[te])
                    y_true = nd_reg.values[te]
                    
                    fve = explained_variance_score(y_true,y_pred,multioutput='uniform_average')
                    r_vals = [pearsonr(y_pred[:,i],y_true[:,i]) for i in range(y_pred.shape[-1])]
                    
                    cca_r = np.mean([pearsonr(u[:,i],v[:,i]) for i in np.arange(n_components)])

#                     r_vals = [pearsonr(ab_vec[0][:,i],ab_vec[1][:,i]) for i in range(ab_vec[0].shape[-1])]
                    
                    r_mean.append(np.mean([r for r,v in r_vals]))
                    cca_mean.append(cca_r)
                    fve_mean.append(fve)
                
                    out_recs.append({
                        'region':br,
                        'layer':reg,
                        'pearsonr': np.mean([r for r,v in r_vals]),
                        'cca_r':cca_r,
                        'fve':fve,
                        'iter':n,
                        'depth':depth,
                    })
                    
                    t.set_postfix(pearson=np.mean(r_mean), cca=np.mean(cca_mean), fve=np.mean(fve_mean))
                    
    return pd.DataFrame.from_records(out_recs)

In [9]:
neural_data = brainscore.get_assembly(name="dicarlo.Majaj2015")
neural_data.load()
stimulus_set = neural_data.attrs['stimulus_set']
# # stimulus_set.to_csv('../data/dicarlo_images/stimulus_set.csv',index=False)
neural_data = process_dicarlo(neural_data)

  xr_data.set_index(append=True, inplace=True, **coords_d)
  result.reset_index(self.multi_group_name, drop=True, inplace=True)
  result.set_index(append=True, inplace=True, **{self.multi_group_name: self.group_coord_names})


In [10]:
sm_imgs = np.load('../data/dicarlo_images/sm_imgs_56x56.npy')

ids3 = stimulus_set[stimulus_set.variation.values==3].image_id.values
sm_ims = list(zip(ids3,sm_imgs[stimulus_set.variation.values==3]))

Xm,Xs = (sm_imgs.mean(),sm_imgs.std())
scaled_sm_imgs = np.clip((sm_imgs-Xm)/Xs,-1,1)

In [11]:
metadata = stimulus_set[['image_id','object_name','category_name','variation','dy_px','dx_px','rxy']].rename(columns={'dx_px':'dx','dy_px':'dy'})
metadata = {k:list(v.values()) for k,v in metadata.to_dict().items()}
# metadata

In [12]:
# dfs = []
# for exp,name in zip(reversed(exps),['no-recon','w/ recon','w/ recon','no-recon']):
#     run = NeptuneExperimentRun(proj_root,neptune_exp=exp)
#     xrs = run.gen_assembly(scaled_sm_imgs, n_units=180, **metadata)
#     lasso_df = lasso(xrs,neural_data,region=None,variation=[0,3],cv=2)
#     lasso_df['model']= name
#     lasso_df['arch']=run.get_config()['encoder_arch']
#     dfs.append(lasso_df)

# lasso_3 = pd.concat(dfs)

In [13]:
obj = ['no-recon','w/ recon','w/ recon','no-recon']
att = []
for exp,o in zip(exps,obj):
    run = NeptuneExperimentRun(proj_root,neptune_exp=exp)
    xr,pcas = run.pca_assembly(scaled_sm_imgs, n_units=None, n_components=0.8, metadata=metadata, pca_kws={'svd_solver':'full'})
    att.append(pcas)

building model DPX-29(arch=convnet, recon=0.0)...
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Compiling model
generating convolutional activations...


PCA: conv_4(5760, 6272):   0%|          | 0/7 [00:00<?, ?it/s]

PCA(n_components=0.8)...





KeyboardInterrupt: 

In [None]:
pca_dfs = []
recs = []
for mod,o in zip(att,obj):
    for k,v in mod.items():
        df = pd.DataFrame.from_records({'fve':v.explained_variance_,
                                        'fve_ratio':v.explained_variance_ratio_,
                                        'component':np.arange(len(v.explained_variance_))})
        df['arch'] = k[:4]
        df['depth'] = int(k[-1:])
        df['layer']=k
        df['objective']=o
        pca_dfs.append(df)
pca_80 = pd.concat(pca_dfs)

In [None]:
pca_80['cum_fve'] = pca_80.groupby(['arch','objective','layer'])['fve_ratio'].transform('cumsum')


In [None]:
pca_80.to_pickle('../data/cca/pca_80fve.pk')

In [None]:
sns.set_context('talk')
g = sns.FacetGrid(data=pca_80,row='objective',col='arch',hue='depth',sharex='col',margin_titles=True,
                  ylim=(0,1),
                  height=4, palette='plasma',legend_out=True,
                 )
# plt.xscale('log')
g.map(sns.lineplot,'component','cum_fve').add_legend()
for a in g.axes.ravel():
    pass
#     a.set_xscale('log')
# g.map(plt.hlines,y=0.8,xmin=0,xmax=600,colors='k',linestyle='dashed')

# sns.lineplot(x='index'y='fve_ratio',hue='')

In [None]:
count_pca_80 = pca_80.groupby(['arch','objective','layer'])['fve'].count().reset_index().rename(columns={'fve':'n_components'})

In [None]:
g = sns.FacetGrid(data=count_pca_80,col='arch',row='objective',sharex='col',sharey=False,margin_titles=True,
#                   ylim=(0,1),
                  height=4, palette='plasma',legend_out=True,
                 )
# plt.xscale('log')
g.map(plt.bar,'layer','n_components')
g.fig.autofmt_xdate(rotation=45)

In [15]:
dfs = []

pca_comps = [500, 250, 100, 100]
obj = ['no-recon','w/ recon','w/ recon','no-recon']

for exp,n_c,obj in zip(exps,pca_comps, obj):
    run = NeptuneExperimentRun(proj_root,neptune_exp=exp)
    xr,pca_objs = run.pca_assembly(scaled_sm_imgs, n_units=None, n_components=n_c, metadata=metadata)
    
    cca_df = cca(xr,neural_data[:,~neural_data.neuroid_id.isin(err_neuroids)],
                 variation=[0,3],cv=6, n_components=1,
                 region=None,brain_region=['IT','V4'],sortby='image_id')
    cca_df['objective']= obj
    cca_df['arch']=run.get_config()['encoder_arch']
    dfs.append(cca_df)

pca_cca_nc_1 = pd.concat(dfs)

building model DPX-29(arch=convnet, recon=0.0)...
Compiling model
generating convolutional activations...


PCA: conv_4(5760, 6272):   0%|          | 0/7 [00:00<?, ?it/s]

PCA(n_components=500)...


PCA: conv_1(5760, 12544): 100%|██████████| 7/7 [00:18<00:00,  3.96s/it]


{'conv_4': PCA(copy=True, iterated_power='auto', n_components=500, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False), 'conv_3': PCA(copy=True, iterated_power='auto', n_components=500, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False), 'conv_2': PCA(copy=True, iterated_power='auto', n_components=500, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False), 'conv_1': PCA(copy=True, iterated_power='auto', n_components=500, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)}


NameError: name 'KFold' is not defined

In [None]:
pca_cca_nc_1.to_pickle('../data/cca/cca(1_component)_w_pca.pk')

In [None]:
cca_3.head()

In [None]:
sns.set_context('talk')
g = sns.FacetGrid(col='arch',row='model', hue='region', data=pca_cca_3,height=4,sharex=False,sharey='row',margin_titles=True)
g.map(sns.boxenplot,'layer','pearsonr').add_legend()
g.fig.autofmt_xdate(rotation=45)

In [None]:
sns.set_context('talk')
g = sns.FacetGrid(col='region',row='model', hue='arch', data=pca_cca_3,height=4,sharex=False,sharey='row',margin_titles=True)
g.map(sns.boxenplot,'depth','fve').add_legend()
g.fig.autofmt_xdate(rotation=45)

In [None]:
sns.set_context('talk')
g = sns.FacetGrid(col='arch',row='region',hue='model', data=pca_cca_3,height=5,sharex=False)
g.map(sns.boxenplot,'layer','fve').add_legend()
g.fig.autofmt_xdate(rotation=45)

In [None]:
sns.boxplot()

In [None]:
cca_3.head()

In [None]:
sns.set_context('talk')
g = sns.FacetGrid(col='arch',row='region',hue='model', data=cca_3,height=5,sharex=False,palette='viridis')
g.map(sns.boxenplot,'layer','pearsonr').add_legend()
g.fig.autofmt_xdate(rotation=45)

In [None]:
sns.boxenplot(x='depth',y='fve',hue='model',data=cca_3.query('arch == "dense"'))

In [None]:
sns.lineplot(x='layer',y='pearsonr',style='model',hue='region',
#                  data=conv_cca.query('{} == "{}"'.format(split_on,col)),
             data=cca_3.query('region == "IT"'),)
    
plt.xticks(rotation=45)

In [None]:
cca_df_all = cca(xrs,neural_data,variation=[0,3,6],cv=35,region=['conv_1','conv_2','conv_3','conv_4','y_enc','z_enc'],sortby='image_id')

In [None]:
sns.set_context('talk')
cca_df_all['model']='no-recon'
cca_df['model'] = 'no-recon'
g = sns.FacetGrid(col='region',row='model',data=cca_df,height=5)
g.map(sns.stripplot,'layer','pearsonr')
g.fig.autofmt_xdate(rotation=45)

In [None]:
sns.lineplot(x='layer',y='pearsonr',style='model',hue='region',
#                  data=conv_cca.query('{} == "{}"'.format(split_on,col)),
             data=cca_df,)
    
plt.xticks(rotation=45)

In [None]:
import src.rcca as rcca
from sklearn.cross_decomposition import CCA

def dicarlo_cca(data,stimulus_set,region,variation=[3],cv=10):
    
    
#     print(data.image_id.values)
#     print(nd.image_id.values)
    
    print('same order? \t',list(data.sortby('image_id').image_id.values) == list(nd.sortby('image_id').image_id.values))
    
    print('model.shape\t',data.shape)
    print('dicarlo.shape\t',nd.shape)
    out_dict = {'region':[],
#                 'variation':[],
#                 'rdm':[],
                'layer':[],
                'pearsonr':[],
                'p-value':[],
                'iter':[],
               }
    xrs = []
    ab_vectors = []
    ccas = []
        
    cv_tr = []
    cv_te = []
    
    num_images = data.shape[0]
    print(num_images)
    
    for rand_delta in np.arange(cv):
        tr_idx, te_idx, _,_ = train_test_split(np.arange(num_images),np.arange(num_images),train_size=0.75,random_state=np.random.randint(0,50)+rand_delta)
        cv_tr.append(tr_idx)
        cv_te.append(te_idx)
        
    for reg in region:
        sub_dat = data.sel(region=reg)
#         print(sub_dat)
        
        for brain_region in ['V4','IT']:
            
            pairing = '{} x {}'.format(reg,brain_region)
            for n, tr,te in tqdm(zip(np.arange(cv),cv_tr,cv_te),total=cv,desc=pairing):
                cca = CCA(n_components=1)
                cca.fit(sub_dat.values[tr],nd.sel(region=brain_region).values[tr])
            
                ab_vec = cca.transform(sub_dat.values[te],nd.sel(region=brain_region).values[te])
        
                r,pv = pearsonr(ab_vec[0],ab_vec[1])

                out_dict['region'].append(brain_region)
                out_dict['layer'].append(reg)
                out_dict['pearsonr'].append(r[0])
                out_dict['p-value'].append(pv[0])
                out_dict['iter'].append(n)
            
#             print(out_dict)
        
#         ccas.append(cca)
        
#         cca_score = r
        
#         cca_score = cca.score(sub_dat.values,nd.sel(region='IT').values)
        
#         cca = CCA(kernelcca = False, reg = 0.001, numCC = 2)
    
#         X_tr, X_te, y_tr, y_te = train_test_split(np.arange(2560),np.arange(2560))
        
#         data_vecs = [sub_dat.values,sub_dat.values,nd.sel(region='IT').values,nd.sel(region='IT').values]
        
#         idxs = [X_tr, X_te, y_tr, y_te]
        
#         X_tr,X_te, y_tr, y_te = tuple([d[idx] for d,idx in zip(data_vecs,idxs)])
        
# #         ,nd.sel(region='IT').values
        
#         print(X_tr.shape,y_tr.shape)
#         print(X_te.shape,y_te.shape)
        
#         cca.train([X_tr,y_tr])
        
#         cca_score = cca.validate([X_te,y_te])
#         print([t.shape for t in cca_score])

#         xrs.append(cca_score)
        
    return out_dict
        