In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xarray

from tqdm import tqdm as tqdm
from brainscore.metrics import Score
from brainscore.assemblies import walk_coords
from scipy.stats import pearsonr
from src.results.experiments import *
from src.results.experiments import _DateExperimentLoader
from src.models import EConvNet,GResNet
from src.trainer import Trainer
from sklearn.linear_model import LinearRegression,Ridge,RidgeCV

from scipy.stats import gaussian_kde
from scipy.stats import norm
from brainscore.metrics.correlation import Correlation, CrossCorrelation
from brainscore.metrics.regression import pearsonr_correlation,CrossRegressedCorrelation,pls_regression,linear_regression
from brainscore.metrics.behavior import I2n
from brainscore.assemblies import split_assembly
from brainio_base.assemblies import DataAssembly

from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
from keras.layers import Input

In [9]:
image_in = Input(shape=(56,56,1))
EBuilder = EConvNet(blocks=[32,64,128,256],z_dim=35)
out = EBuilder.build(image_in)

In [10]:
from keras.models import Model
mod = Model(image_in,out)
mod.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 56, 56, 1)         0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 7, 7, 128)         73856     
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 4, 4, 256)         295168    
_________________________________________________________________
batch_normalization_2 (Batch (None, 4, 4, 256)         1024      
_________________________________________________________________
activation_2 (Activation)    (None, 4, 4, 256)         0         
__________

In [5]:
G_builder = GResNet(y_dim=config.y_dim,z_dim=config.z_dim,dec_blocks=config.dec_blocks,flatten_out=False)

NameError: name 'config' is not defined

In [None]:
G_builder = GResNet(y_dim=config.y_dim,z_dim=config.z_dim,dec_blocks=config.dec_blocks,flatten_out=False)
E_builder = EConvNet(blocks=config.enc_blocks,z_dim=config.z_dim,)
trainer = Trainer(config,DL,E_builder,G_builder,)

In [None]:
def process_dicarlo(assembly,avg_repetition=True,variation=3,tasks=['ty','tz','rxy']):
    stimulus_set = assembly.attrs['stimulus_set']
    stimulus_set['dy_deg'] = stimulus_set.tz*stimulus_set.degrees
    stimulus_set['dx_deg'] = stimulus_set.ty*stimulus_set.degrees
    stimulus_set['dy_px'] = stimulus_set.dy_deg*32
    stimulus_set['dx_px'] = stimulus_set.dx_deg*32
    
    assembly.attrs['stimulus_set'] = stimulus_set
    
    data = assembly.sel(variation=variation)
    groups = ['category_name', 'object_name', 'image_id']+tasks
    if not avg_repetition:
        groups.append('repetition')
        
    data = data.multi_groupby(groups)     # (2)
    data = data.mean(dim='presentation')
    data = data.squeeze('time_bin')    #   (3)
    data.attrs['stimulus_set'] = stimulus_set.query('variation == {}'.format(variation))
    data = data.T
    
    return data

In [None]:
med_data = process_dicarlo(neural_data)
hi_data = process_dicarlo(neural_data,variation=6)
# lo_data = process_dicarlo(neural_data,variation=0)
v4_med = med_data.sel(region='V4')
it_med = med_data.sel(region='IT')

v4_hi = hi_data.sel(region='V4')
it_hi = hi_data.sel(region='IT')

In [None]:
med_data

# Load Our Experiments
- Lg Feedforward (2019-06-03)
    - (3000,2000,500,70)
- Sm Feedforward (2019-05-24)
    - (3000,2000,500,15)
- Convolutional

In [None]:
# lg_ff = _DateExperimentLoader('2019-06-25')
lg_ff = _DateExperimentLoader('2019-06-03')
# sm_ff = _DateExperimentLoader('2019-05-24')
lg_ff.load()

In [None]:
lg_xent = lg_ff.assemblies[0]
lg_both = lg_ff.assemblies[1]
lg_recon = lg_ff.assemblies[2]

In [None]:
lg_xent

In [None]:
from brainscore.assemblies import split_assembly

In [None]:
from sklearn.linear_model import LinearRegression,Ridge

In [None]:
alphas = tuple(np.logspace(-2,2,num=10))

In [None]:
est = RidgeCV(alphas=alphas,store_cv_values=True)
tr,te = split_assembly(med_data.sel(region='IT'))

In [None]:
est.fit(tr.values,y=tr['tz'])

In [None]:
print(est.alpha_)
est.cv_values_.mean(axis=0)

In [None]:
sns.kdeplot(med_data.ty*8,med_data.tz*8)

In [None]:
def SUCorrelation(da,neuroid_coord,correlation_vars,exclude_zeros=True):
    if exclude_zeros:
        nz_neuroids = da.groupby(neuroid_coord).sum('presentation').values!=0
        da = da[:,nz_neuroids]
    
    correlations = np.empty((len(da[neuroid_coord]),len(correlation_vars)))
    for i,nid in tqdm(enumerate(da[neuroid_coord].values),total=len(da[neuroid_coord])):
        for j,prop in enumerate(correlation_vars):
            n_act = da.sel(**{neuroid_coord:nid}).squeeze()
            r,p = pearsonr(n_act,prop)
            correlations[i,j] = np.abs(r)

    neuroid_dim = da[neuroid_coord].dims
    c = {coord: (dims, values) for coord, dims, values in walk_coords(da) if dims == neuroid_dim}
    c['task']=('task',[v.name for v in correlation_vars])
#     print(neuroid_dim)
    result = Score(correlations,
                       coords=c,
                       dims=('neuroid','task'))
    return result

def result_to_df(SUC,corr_var_labels):
    df = SUC.neuroid.to_dataframe().reset_index()
    for label in corr_var_labels:
        df[label]=SUC.sel(task=label).values
    
    return df

class MURegressor(object):
    def __init__(self,da,train_frac=0.8,n_splits=5,n_units=None,estimator=Ridge):
        if n_units is not None:
            self.neuroid_idxs = [np.array([random.randrange(len(da.neuroid_id)) for _ in range(n_units)]) for _ in range(n_splits)]
        
        self.original_data = da
        self.train_frac = train_frac
        self.n_splits = n_splits
        
        splits = [split_assembly(self.original_data[:,n_idxs]) for n_idxs in tqdm(self.neuroid_idxs,total=n_splits,desc='CV-splitting')]
        self.train = [tr for tr,te in splits]
        self.test = [te for tr,te in splits]
        
        
        self.estimators = [estimator() for _ in range(n_splits)]
        
    def fit(self,y_coord):
        # Get Training data
        for mod,train in tqdm(zip(self.estimators,self.train),total=len(self.train),desc='fitting'):
#             print(train)
            mod.fit(X=train.values,y=train[y_coord])
    
        return self
    
    def predict(self,X=None):
        if X is not None:
            return [e.predict(X) for e in self.estimators]
        else:
            return [e.predict(te.values) for e,te in zip(self.estimators,self.test)]
        
    def score(self,y_coord):
        return [e.score(te.values,te[y_coord].values) for e,te in zip(self.estimators,self.test)]

In [None]:
def stratified_regressors(data, filt='region',n_units=126,y_coords=['ty','tz'],task_names=None,estimator=Ridge):
    subsets = np.unique(data[filt].values)
    if task_names is None:
        task_names = y_coords
    dfs = []
    for y,task in zip(y_coords,task_names):
        print('regressing {}...'.format(y))
        regressors = {k:MURegressor(data.sel(**{filt:k}),n_units=n_units,estimator=Ridge).fit(y_coord=y) for k in subsets}
        df = pd.DataFrame.from_records({k:v.score(y_coord=y) for k,v in regressors.items()})
        df = df.melt(var_name='region',value_name='performance')
        df['task']=task
        dfs.append(df)
    
    return pd.concat(dfs)

In [None]:
hi_df = stratified_regressors(hi_data,y_coords=['ty','tz','rxy'],n_units=100,
#                               task_names=['tx','ty','rxy'],
                              estimator=RidgeCV)
med_df = stratified_regressors(med_data, y_coords=['ty','tz','rxy'],n_units=100,
#                                task_names=['tx','ty','rxy'],
                               estimator=RidgeCV)

In [None]:
sns.barplot(x='task',y='performance',hue='region',hue_order=['V4','IT'],data=med_df)

In [None]:
sns.barplot(x='task',y='performance',hue='region',hue_order=['V4','IT'],data=hi_df)

In [None]:
lg_both_top = lg_both[:,lg_both.layer.isin([2,3,4])]

In [None]:
both_df = stratified_regressors(lg_both,filt='layer',y_coords=['tx','ty','rxy'],n_units=50)

In [None]:
# lg_xent_top = lg_xent[:,lg_xent.layer.isin([2,3,4])]
xent_df = stratified_regressors(lg_xent,filt='layer',y_coords=['tx','ty','rxy'],n_units=50)

In [None]:
both_df.head()

In [None]:
sns.boxplot(x='task',y='performance',hue='region',data=both_df)

In [None]:
sns.boxplot(x='task',y='performance',hue='region',data=xent_df)

In [None]:
both_regressors

In [None]:
med_v4_MUR.score(y_coord='ty')

In [None]:
[(tr.shape,te.shape) for tr,te in med_MUR_dicarlo.splits]
[n for n in med_MUR_dicarlo.neuroid_idxs]

In [None]:
properties = ['tx','ty',
#               'rxy',
             ]

In [None]:
corr_vars_both = [pd.Series(lg_both[v].values,name=v) for v in ['tx','ty']]
corr_both = SUCorrelation(lg_both,neuroid_coord='neuroid_id',correlation_vars=corr_vars_both)

In [None]:
corr_vars_xent = [pd.Series(lg_xent[v].values,name=v) for v in ['tx','ty']]
corr_xent = SUCorrelation(lg_xent,neuroid_coord='neuroid_id',correlation_vars=corr_vars_xent)

In [None]:
corr_vars_recon = [pd.Series(lg_recon[v].values,name=v) for v in properties]
corr_recon = SUCorrelation(lg_recon,neuroid_coord='neuroid_id',correlation_vars=corr_vars_recon)

In [None]:
dicarlo_hi_corr_vars = [
    pd.Series(hi_data['ty'],name='tx'),
    pd.Series(hi_data['tz'],name='ty'),
    pd.Series(hi_data['rxy'],name='rxy'),
]
corr_dicarlo_hi = SUCorrelation(hi_data,neuroid_coord='neuroid_id',correlation_vars=dicarlo_hi_corr_vars,exclude_zeros=True)

dicarlo_med_corr_vars = [
    pd.Series(med_data['ty'],name='tx'),
    pd.Series(med_data['tz'],name='ty'),
    pd.Series(med_data['rxy'],name='rxy'),

]
corr_dicarlo_med = SUCorrelation(med_data,neuroid_coord='neuroid_id',correlation_vars=dicarlo_med_corr_vars,exclude_zeros=True)


# dicarlo_lo_corr_vars = [
#     pd.Series(lo_data['ty'],name='tx'),
#     pd.Series(lo_data['tz'],name='ty'),
# ]
# corr_dicarlo_lo = SUCorrelation(lo_data,neuroid_coord='neuroid_id',correlation_vars=dicarlo_lo_corr_vars,exclude_zeros=True)



In [None]:
dicarlo_med_df = result_to_df(corr_dicarlo_med,['tx','ty','rxy'])
dicarlo_med_df['variation']=3

dicarlo_hi_df = result_to_df(corr_dicarlo_hi,['tx','ty','rxy'])
dicarlo_hi_df['variation']=6

# dicarlo_lo_df = result_to_df(corr_dicarlo_lo,['tx','ty'])
# dicarlo_lo_df['variation']=0
# dicarlo_lo_df['norm_ty'] = dicarlo_lo_df['ty']

# dicarlo_df = pd.concat([dicarlo_hi_df,dicarlo_med_df])
# dicarlo_df['norm_ty'] = dicarlo_df['ty']/2

# dicarlo_df = pd.concat([dicarlo_df,dicarlo_lo_df])

In [None]:
both_df = result_to_df(corr_both,['tx','ty'])
both_df['norm_ty'] = both_df.ty

xent_df = result_to_df(corr_xent,['tx','ty'])
xent_df['norm_ty'] = xent_df.ty

recon_df = result_to_df(corr_recon,['tx','ty'])
recon_df['norm_ty'] = recon_df.ty

In [None]:
def plot_kde(x,y,df,by='region',order=None):
    if order is not None:
        subsets = order
    else:
        subsets = df[by].drop_duplicates().values
        
    plot_scale = 5
    fig,axs = plt.subplots(1,len(subsets),figsize=(plot_scale*len(subsets),plot_scale),sharex=True,sharey=True,
                           subplot_kw={
                               'xlim':(0.0,0.8),
                               'ylim':(0.0,0.8)
                           })
    
    for ax,sub in zip(axs,subsets):
        sub_df = df.query('{} == "{}"'.format(by,sub))
        sns.kdeplot(sub_df[x],sub_df[y],ax=ax)
        ax.set_title("{}: {}".format(by,sub))

In [None]:
# med_data

In [None]:
def plot_bars(y,df,by='region',order=None):
    if order is not None:
        subsets = order
    else:
        subsets = df[by].drop_duplicates().values
        
    plot_scale = 5
    fig,axs = plt.subplots(1,len(subsets),figsize=(plot_scale*len(subsets),plot_scale),sharex=True,sharey=True,
                           subplot_kw={
                               'xlim':(0.0,0.8),
                               'ylim':(0.0,0.8)
                           })
    
    for ax,sub in zip(axs,subsets):
        subsets = df[by].drop_duplicates().values
        sub_df = df.query('{} == "{}"'.format(by,sub))
        sns.barplot(x=by,y=y,ax=ax)

# plot_bars(y='tx',df=both_df,by='layer',order=np.arange(5))

In [None]:
sns.barplot(x='layer',y='ty',data=xent_df)

In [None]:
plot_kde('tx','ty',both_df,by='layer',order=np.arange(5))

In [None]:
plot_kde('tx','ty',xent_df,by='layer',order=np.arange(5))

In [None]:
plot_kde('tx','norm_ty',recon_df,by='layer',order=np.arange(5))

In [None]:
sns.set_context('talk')
plot_kde('tx','ty',dicarlo_df.query('variation == 6'),by='region',order=['V4','IT'])

In [None]:
plot_kde('tx','ty',dicarlo_df.query('variation == 3'),by='region',order=['V4','IT'])

In [None]:
# g = corr.groupby('region')

# corr_res = corr.reindex(task=corr.task,neuroid=corr.neuroid_id)
corr= corr.name='both'
corr.reset_coords()

# g.groups
# for l,grp in g:
#     res_grp = grp.dropna('neuroid')
#     res_grp.name=label
#     res_grp = res_grp.reindex(task=res_grp.task,neuroid=res_
#     print(res_grp)
#     res_grp.to_dataframe(name='label').head()

In [None]:
g = corr.dropna(dim='neuroid').reset_index(corr.dims).groupby('region')
for label,group in g:
    agg_dfs.append(group.reset_index(group.dims).to_dataframe(name='label'))

In [None]:
corr_dicarlo

In [None]:
lg.groupby('neuroid_id').groups

In [None]:
from scipy.stats import pearsonr,pearson3

class XArraySUCorrelation(object):
    def __init__(self,assembly,stimulus_coords='tx',neuroid_coord='neuroid_id',func=pearsonr):
        self.stimulus_coord = stimulus_coord
        self.func = func
        
        

In [None]:
pearsonr()

In [None]:
# compact_data = data.multi_groupby(['category_name', 'object_name', 'image_id'])
# compact_data = compact_data.mean(dim='presentation')
# compact_data = compact_data.squeeze('time_bin')  # (3)

In [None]:

# compact_data = compact_data.T  # (4)

In [None]:
# stimulus_set['y_pix'] = scaler.fit_transform(stimulus_set.ty.values.reshape(-1,1))
# stimulus_set['z_pix'] = scaler.fit_transform(stimulus_set.tz.values.reshape(-1,1))

stimulus_set.head()

In [None]:
tx = stimulus_set.query('variation == 6')
tx[['ty','tz','x','y','x_px','y_px']].describe()

In [None]:
sns.kdeplot(tx.ty,tx.tz,shade=True)

In [None]:
sns.scatterplot(v4_resp.x,v4_resp.y)

In [None]:
from matplotlib import image

def resp_dist(dat, presentation = None):
    fig, axs = plt.subplots(1,2,figsize=(10,5))
    if presentation is None:
        presentation = random.randrange(dat.values.shape[1])
    
    d = dat[:,presentation]
    cat_name, obj_name, image_id, tz, ty = d.presentation.values.tolist()
    image_path = stimulus_set.get_image(image_id)
    props = stimulus_set.query('image_id == "{}"'.format(image_id))
    g = sns.distplot(d.values,norm_hist=True,ax=axs[1])
    
    img = image.imread(image_path)
    axs[0].imshow(img)
    axs[0].set_title('{} tz:{} yz:{}'.format(obj_name, tz*8,ty*8))
    axs[0].scatter(props.x_px.values+128,props.y_px.values+128)
    print(props['image_file_name'].values)
    print(props[['ty','tz']])
    print(props[['x','y','x_px','y_px']])
    
    return g,props

g,props = resp_dist(v4_resp)
props

In [None]:
x = neural_data.sel(variation=6)  # (1)
x = x.multi_groupby(['category_name', 'object_name', 'image_id','repetition','ty','tz'])  # (2)
x = x.mean(dim='presentation')
x = x.squeeze('time_bin')

In [None]:
def xr_to_df(x):
    ty = x.tz.values
    tx = x.ty.values
    xdf = pd.DataFrame(x.values.T,columns=x.neuroid_id.values)
    xdf['class'] = x.object_name.values
    xdf['dy']=ty
    xdf['dx']=tx

    return xdf

In [None]:
v4_resp.object_name.values

In [None]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MultiLabelBinarizer,LabelBinarizer

In [None]:
clf = LinearSVC(C=1,max_iter=10000,verbose=1)
cross_val_score(clf,v4_resp.values.T,v4_resp.category_name.values,verbose=1,cv=5,n_jobs=5)

In [None]:
v4_resp

In [None]:
clf = LinearSVC(C=1,max_iter=10000,verbose=1)
cross_val_score(clf,IT_resp.values.T,IT_resp.category_name.values,verbose=1,cv=5,n_jobs=5)

In [None]:


labels = v4_resp.object_name.values
labeler
for lab in np.unique(labels):
    LabelBinarizer().transform()

classifier = SVC(C=10)
# cross_val_score(classifier,v4_resp.values.T,v4_resp.object_name.values,cv=5,verbose=True)

In [None]:
MultiLabelBinarizer()

In [None]:
classifier.predict()

In [None]:
v4 = x.sel(region='V4')
v4_df = xr_to_df(v4)

it = x.sel(region='IT')
it_df = xr_to_df(it)

In [None]:
ds = xarray.open_dataset('/home/elijahc/projects/vae/models/2019-06-03/xent_15_recon_25/label_corruption_0.0/dataset.nc')

In [None]:
da = ds['Only Recon']
da.coords.

In [None]:
v4_x_sel = dicarlo_r(v4.values.T,prop=v4_df.dx)
v4_y_sel = dicarlo_r(v4.values.T,prop=v4_df.dy)

it_x_sel = dicarlo_r(it.values.T,prop=it_df.dx)
it_y_sel = dicarlo_r(it.values.T,prop=it_df.dy)

# v4_class_sel = dprime(v4_df,num_units=len(v4_resp.neuroid_id),col='class',mask_missing=False)

In [None]:
v4_results = pd.DataFrame({
    'dx':v4_x_sel,
    'dy':v4_y_sel
})

In [None]:
metric = CrossRegressedCorrelation(regression=pls_regression(),correlation=pearsonr_correlation())
v4_score = metric(v4,v4)

In [None]:
v4_r

In [None]:
v4_r.

In [None]:
v4_df.head()

In [None]:
# resp_dist(v4_resp,random_n=False)

In [None]:
v4_resp

In [None]:
image_path = stimulus_set.get_image(stimulus_set['image_id'][0])
print(image_path)