In [15]:
from __future__ import division

import numpy as np
import os
from glob import glob

from PIL import Image
from copy import deepcopy

from sklearn import linear_model, datasets, neighbors
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm

%matplotlib inline
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns

import pandas as pd

import scipy.stats as stats
from scipy.stats import norm
import sklearn

In [16]:
path_to_data = '/data/jefan/neurosketch_features_3'

In [17]:
METAS = sorted([i for i in os.listdir(path_to_data) if i.split('.')[-1]=='csv'])
FEATS = sorted([i for i in os.listdir(path_to_data) if i.split('.')[-1]=='npy'])
SUBS = np.array([i.split('_')[0]+'_neurosketch' for i in FEATS])
ROIS = np.array([i.split('_')[1] for i in FEATS])
roi_list = np.array(['V1','LOC','fusiform','IT','occitemp'])
sub_list = np.unique(SUBS)

In [18]:
## take in single subjects feature matrix, take transpose
SPLITS = []
for sub in sub_list:
    Splits = []
    for roi in roi_list:
        print 'Analyzing {} of {} ...'.format(roi, sub)
            
        _feat_ind = (SUBS==sub) & (ROIS==roi)        
        assert sum(_feat_ind)==1    
        feat_ind = np.where(_feat_ind==True)[0][0]

        F = np.load(os.path.join(path_to_data,FEATS[feat_ind])).transpose()
        assert F.shape[0]==160

        ## read in that subject's metadata file
        M = pd.read_csv(os.path.join(path_to_data,METAS[feat_ind]))

        ## z-score within run
        def normalize(X):
            X = X - X.mean(0)
            X = X / np.maximum(X.std(0), 1e-6)
            return X

        run1_inds = M.index[M.run_num==1]
        run2_inds = M.index[M.run_num==2]

        Fnorm = np.vstack((normalize(F[run1_inds,:]),normalize(F[run2_inds,:])))

        ## divide into train/test split by run
        F1 = Fnorm[run1_inds,:]
        F2 = Fnorm[run2_inds,:]
        Labels1 = M.iloc[run1_inds].label.values
        Labels2 = M.iloc[run2_inds].label.values

        splits = []
        # fit on run1, test on run2
        clf = linear_model.LogisticRegression(penalty='l2',C=1,random_state=1).fit(F1, Labels1)
        score = clf.score(F2, Labels2)
        splits.append(score)
        # train on run2, test on run1
        clf = linear_model.LogisticRegression(penalty='l2',C=1,random_state=1).fit(F2, Labels2)
        score = clf.score(F1, Labels1)
        splits.append(score)
        Splits.append(splits)
    SPLITS.append(Splits)
SPLITS = np.array(SPLITS)

Analyzing V1 of 0110171_neurosketch ...
Analyzing LOC of 0110171_neurosketch ...
Analyzing fusiform of 0110171_neurosketch ...
Analyzing IT of 0110171_neurosketch ...
Analyzing occitemp of 0110171_neurosketch ...
Analyzing V1 of 0110172_neurosketch ...
Analyzing LOC of 0110172_neurosketch ...
Analyzing fusiform of 0110172_neurosketch ...
Analyzing IT of 0110172_neurosketch ...
Analyzing occitemp of 0110172_neurosketch ...
Analyzing V1 of 0111171_neurosketch ...
Analyzing LOC of 0111171_neurosketch ...
Analyzing fusiform of 0111171_neurosketch ...
Analyzing IT of 0111171_neurosketch ...
Analyzing occitemp of 0111171_neurosketch ...
Analyzing V1 of 0112171_neurosketch ...
Analyzing LOC of 0112171_neurosketch ...
Analyzing fusiform of 0112171_neurosketch ...
Analyzing IT of 0112171_neurosketch ...
Analyzing occitemp of 0112171_neurosketch ...
Analyzing V1 of 0112172_neurosketch ...
Analyzing LOC of 0112172_neurosketch ...
Analyzing fusiform of 0112172_neurosketch ...
Analyzing IT of 01121

In [None]:
zip(roi_list, SPLITS.mean(2).mean(0))

In [None]:
plt.figure(figsize=(16,16))
sns.set_context('poster')
for roi in roi_list:
    roi_ind = np.where(roi_list==roi)[0][0]
    plt.subplot(2,3,roi_ind+1)
    this_roi = SPLITS[:,roi_ind,:]
    plt.scatter(this_roi[:,0],this_roi[:,1])
    plt.xlabel('test on run 2')
    plt.ylabel('test on run 1')
    plt.title('decoding acc. {}'.format(roi))

In [None]:
fig = plt.figure(figsize=(10,6))
bar_width = 0.50
opacity = 0.8
sem = SPLITS.mean(2).std(0)/np.sqrt(len(sub_list))
bar_height = sem * 2
index = np.arange(len(roi_list))
yvals = SPLITS.mean(2).mean(0)
plt.bar(index,
        yvals,
        bar_width,
        yerr=bar_height,
        alpha=opacity,
        color=(0.6,0.2,0.2))
tick = plt.xticks(index, roi_list)
plt.axhline(y=0.25,linewidth=2, color=(0.1,0.1,0.1),linestyle='dashed')
plt.ylim([0,0.5])
plt.ylabel('classifier accuracy')
# plt.xlim(0.5,1.5)
plt.tight_layout()
plt.savefig('/home/jefan/sketchfeat/sketches/plots/object_decoding_accuracy_localizer_runs.pdf')