In [None]:
# set current directory (where this repo is located)
import os
PROJECT_ROOT = '/home/young/workspace/reconstruction/recon-mnistc'
os.chdir(PROJECT_ROOT)
print('current directory:', os.getcwd())

In [2]:
# load required libraries & modules
%load_ext autoreload
%autoreload 2

from tqdm.notebook import tqdm
import pprint
import time
import warnings
# warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import torch

from utils import *
from loaddata import *
from visualization import *
from ourmodel import *

torch.set_grad_enabled(False)
torch.set_printoptions(sci_mode=False)

DATA_DIR = '../data'
DEVICE = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# DEVICE = torch.device('cpu')

BATCHSIZE = 1000

PATH_MNISTC = '../data/MNIST_C/'
CORRUPTION_TYPES = ['identity', 
         'shot_noise', 'impulse_noise','glass_blur','motion_blur',
         'shear', 'scale',  'rotate',  'brightness',  'translate',
         'stripe', 'fog','spatter','dotted_line', 'zigzag',
         'canny_edges']

N_MINI_PER_CORRUPTION = 1000

ACC_TYPE = "entropy"

# general helper funtions for model testing
def load_model(args):
    # load model
    model = RRCapsNet(args).to(args.device) 
    model.load_state_dict(torch.load(args.load_model_path))
    return model

def load_args(load_model_path, args_to_update, verbose=False):
    params_filename = os.path.dirname(load_model_path) + '/params.txt'
    assert os.path.isfile(params_filename), "No param flie exists"
    args = parse_params_wremove(params_filename, removelist = ['device']) 
    args = update_args(args, args_to_update)
    args.load_model_path = load_model_path
    if verbose:
        pprint.pprint(args.__dict__, sort_dicts=False)
    return args


###########################
# evaluate on mnist-c original version
############################
@torch.no_grad()
def evaluate_model_on_mnistc_original(corruption, model, verbose=False, save_hooks=False,  max_batch_num=None):
    path_images = os.path.join(PATH_MNISTC, corruption, 'test_images.npy')
    path_labels = os.path.join(PATH_MNISTC, corruption, 'test_labels.npy')

    # convert to torch
    images = np.load(path_images)
    labels = np.load(path_labels)
    transform_tohot = T.Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1))
    images_tensorized = torch.stack([T.ToTensor()(im) for im in images])
    labels_tensorized = torch.stack([transform_tohot(label) for label in labels])
    # print(images_tensorized.shape) #torch.Size([10000, 1, 28, 28])
    # print(labels_tensorized.shape) #torch.Size([10000, 10])

    # create dataloader
    kwargs = {'num_workers': 1, 'pin_memory': True} if DEVICE == 'cuda' else {}
    dataset = TensorDataset(images_tensorized, labels_tensorized)
    dataloader = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=False, drop_last=False, **kwargs)

    if save_hooks:
        def get_attention_outputs():
            def hook(model, input, output):
                x_mask_step.append(output[0].detach())
                x_input_step.append(output[1].detach())
            return hook

        def get_capsule_outputs():
            def hook(model, input, output):
                objcaps_step.append(output[0].detach())
                coups_step.append(torch.stack(output[1]['coups'], dim=1))
                betas_step.append(torch.stack(output[1]['betas'], dim=1)) 
                if 'rscores' in output[1].keys():
                    rscores_step.append(torch.stack(output[1]['rscores'], dim=1))
                if 'recon_coups' in output[1].keys():
                    recon_coups_step.append(torch.stack(output[1]['recon_coups'], dim=1))
                if 'outcaps_len' in output[1].keys():
                    outcaps_len_step.append(torch.stack(output[1]['outcaps_len'], dim=1))
                if 'outcaps_len_before' in output[1].keys():
                    outcaps_len_before_step.append(torch.stack(output[1]['outcaps_len_before'], dim=1))
            return hook
        
        outputs = {}

        x_input_step_all = []; x_mask_step_all = []; objcaps_step_all = []

        coups_step_all = []; betas_step_all= []; rscores_step_all=[]; recon_coups_step_all=[] 
        outcaps_len_step_all=[]; outcaps_len_before_step_all=[]

    x_all, y_all, gtx_all, loss_all, acc_all, objcaps_len_step_all, x_recon_step_all = [],[],[],[],[],[],[]
    
    model.eval()      
    
    # get input and gt
    i=0
    for data in dataloader:
        x, y = data
        gtx = None
        
        if max_batch_num:
            if i == max_batch_num:
                break
                
#         if i == max_batch_num:
#             x, y = data
#             gtx = None

        # for hooks over other model output
        x_input_step = []; x_mask_step = []; objcaps_step = []

        if save_hooks:

            # for hooks over dynamic routing
            coups_step = []; betas_step= []; rscores_step=[]; recon_coups_step=[] 
            outcaps_len_step=[]; outcaps_len_before_step=[]

            hook1 = model.input_window.register_forward_hook(get_attention_outputs())
            hook2 = model.capsule_routing.register_forward_hook(get_capsule_outputs())

        # evaluate and append results 
        losses, acc, objcaps_len_step, x_recon_step = evaluate(model, x, y, args, acc_type=ACC_TYPE, gtx=gtx)

        if verbose:
            print("==> On this sigle test batch: test_loss=%.5f, test_loss_class=%.5f, test_loss_recon=%.5f, test_acc=%.4f"
                  % (losses[0], losses[1], losses[2], acc))   

        # main input and output append
        x_all.append(x)
        y_all.append(y)
        if gtx:
            gtx_all.append(gtx)
        #         loss_all.append(losses[0])
        acc_all.append(acc)
        objcaps_len_step_all.append(objcaps_len_step)
        x_recon_step_all.append(x_recon_step)

        if save_hooks:

            # hook variables append
            x_input_step_all.append(torch.stack(x_input_step, dim=1))
            x_mask_step_all.append(torch.stack(x_mask_step, dim=1))
            objcaps_step_all.append(torch.stack(objcaps_step, dim=1))

            coups_step_all.append(torch.stack(coups_step, dim=1))
            betas_step_all.append(torch.stack(betas_step, dim=1))
            if rscores_step:
                rscores_step_all.append(torch.stack(rscores_step, dim=1))
            if recon_coups_step:
                recon_coups_step_all.append(torch.stack(recon_coups_step, dim=1))
            if outcaps_len_step:
                outcaps_len_step_all.append(torch.stack(outcaps_len_step, dim=1))
            if outcaps_len_before_step:
                outcaps_len_before_step_all.append(torch.stack(outcaps_len_before_step, dim=1))

            hook1.remove()
            hook2.remove()        
        
        
        i+=1
        

    
        
    # concat and add to outputs dictionary
    x_all = torch.cat(x_all, dim=0)
    y_all = torch.cat(y_all, dim=0)
    if gtx:
        gtx_all = torch.cat(gtx_all, dim=0)
    else:
        gtx_all = gtx
    acc_all = torch.cat(acc_all, dim=0)
    objcaps_len_step_all = torch.cat(objcaps_len_step_all, dim=0)
    x_recon_step_all = torch.cat(x_recon_step_all, dim=0)
    
    if save_hooks:
        outputs['x_input']= torch.cat(x_input_step_all, dim=0)
        outputs['x_mask']= torch.cat(x_mask_step_all, dim=0)
        outputs['objcaps']= torch.cat(objcaps_step_all, dim=0)

        outputs['coups'] = torch.cat(coups_step_all, dim=0)
        outputs['betas'] = torch.cat(betas_step_all, dim=0)
        if rscores_step_all:
            outputs['rscores'] = torch.cat(rscores_step_all, dim=0)
        if recon_coups_step_all:
            outputs['recon_coups'] = torch.cat(recon_coups_step_all, dim=0)
        if outcaps_len_step_all:
            outputs['outcaps_len'] = torch.cat(outcaps_len_step_all, dim=0)
        if outcaps_len_before_step_all:
            outputs['outcaps_len_before'] = torch.cat(outcaps_len_before_step_all, dim=0)
            
        return x_all, gtx_all, y_all, acc_all, objcaps_len_step_all, x_recon_step_all, outputs  

    else:
        return x_all, gtx_all, y_all, acc_all, objcaps_len_step_all, x_recon_step_all
    
    

@torch.no_grad()
def evaluate_cnn_on_mnistc_original(corruption, cnn, max_batch_num=None):
    path_images = os.path.join(PATH_MNISTC, corruption, 'test_images.npy')
    path_labels = os.path.join(PATH_MNISTC, corruption, 'test_labels.npy')

    # convert to torch
    images = np.load(path_images)
    labels = np.load(path_labels)
    transform_tohot = T.Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1))
    images_tensorized = torch.stack([T.ToTensor()(im) for im in images])
    labels_tensorized = torch.stack([transform_tohot(label) for label in labels])
    # print(images_tensorized.shape) #torch.Size([10000, 1, 28, 28])
    # print(labels_tensorized.shape) #torch.Size([10000, 10])

    # create dataloader
    kwargs = {'num_workers': 1, 'pin_memory': True} if DEVICE == 'cuda' else {}
    dataset = TensorDataset(images_tensorized, labels_tensorized)
    dataloader = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=False, drop_last=False, **kwargs)

    # save output
    x_all, y_all, pred_all, acc_all, class_prob_all = [],[],[], [],[]
    cnn.eval() 

    # get input and gt
    i=0
    for data in dataloader:
        x, y = data
        gtx = None
        
        if max_batch_num:
            if i == max_batch_num:
                break

#         if i == max_batch_num:
#             x, y = data
#             gtx = None
                    
        data, target = x.to(DEVICE),  y.to(DEVICE)
        target = target.argmax(dim=1, keepdim=True)
        output = cnn(data)
        #                 test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        acc = pred.eq(target.view_as(pred))

        x_all.append(data)
        y_all.append(target.flatten())
        pred_all.append(pred.flatten())
        acc_all.append(acc.flatten().float())
        class_prob_all.append(output)
        
        i+=1


    # concat and add to outputs dictionary
    x_all = torch.cat(x_all, dim=0)
    y_all = torch.cat(y_all, dim=0)
    pred_all = torch.cat(pred_all, dim=0)
    acc_all = torch.cat(acc_all, dim=0)
    class_prob_all = torch.cat(class_prob_all, dim=0)

    return x_all, y_all, class_prob_all, pred_all, acc_all

In [3]:
import matplotlib
import collections
import json
import random

def save_imgarr(imgarr, filename='test.png', scale=8):
    h, w, _ = imgarr.shape
    fig, axes = plt.subplots(figsize=(h*scale, w*scale))
    fig.subplots_adjust(top=1.0, bottom=0, right=1.0, left=0, hspace=0, wspace=0) 
    axes.imshow(imgarr, cmap='gray_r')
    axes.axis('off')
    plt.savefig(filename, dpi=1, format='png') 
    plt.close(fig)

# experiment 1: timestep vs RT on accurate trials

In [30]:
########
# get nsteps for correct trials 
########

task='mnist_c_original'
train=False #train or test dataset
print_args=False


# load model
args_to_update = {'device':DEVICE, 'batch_size':BATCHSIZE, 
                 'time_steps': 5, 'routings': 3,'mask_threshold': 0.1}

load_model_path = './models/rrcapsnet/rrcapsnet_best.pt' #run1

args = load_args(load_model_path, args_to_update, print_args)
model = load_model(args)

#  obtain model prediction

d_triallist = {}
CORRUPTION_INTEREST = ['identity', 'glass_blur','motion_blur', 'impulse_noise','shot_noise',
        'fog','dotted_line','spatter', 'zigzag']

for corruption in CORRUPTION_INTEREST :

    d_triallist[corruption] = {}
    
    if task =='mnist_c_original':
        print("original is used")
        x, gtx, y_hot, acc_model, objcaps_len_step, x_recon_step = \
        evaluate_model_on_mnistc_original(corruption, model, verbose=False, save_hooks=False)
        print(f'==> corruption type: {corruption}, this batch acc: {acc_model.mean().item()}')
    else:
        raise NotImplementedError

    # get model prediction
    objcaps_len_step_narrow = objcaps_len_step.narrow(dim=2,start=0, length=args.num_classes)
    # pred_model = objcaps_len_step_narrow.max(dim=-1)[1][:,-1] #torch.Size([1000, 3])

    if ACC_TYPE=='hypothesis':
        if args.time_steps==1:
            y_pred = objcaps_len_step_narrow[:,-1]
            accs = topkacc(y_pred, y_true, topk=1)
        else:
            acc_model_check, pred_model, nstep  = compute_hypothesis_based_acc(objcaps_len_step_narrow, y_hot, only_acc=False)

    elif ACC_TYPE == 'entropy':    
        if args.time_steps==1:
            y_pred = objcaps_len_step_narrow[:,-1]
            accs = topkacc(y_pred, y_true, topk=1)
        else: 
            acc_model_check, pred_model, nstep, no_stop_condition, entropy_model =compute_entropy_based_acc(objcaps_len_step_narrow, y_hot, threshold=0.6, use_cumulative = False, only_acc= False)

    assert round(acc_model.mean().item(), 4) == round(acc_model_check.float().mean().item(), 4)

    # get accurate trials
    nstep_masked = acc_model.cpu().numpy()*nstep.cpu().numpy()
    id_incorrect = np.where(nstep_masked==0)[0].tolist()
    id_correct = np.where(nstep_masked!=0)[0].tolist()
    print('\n========  ', corruption, '  ========')
    print('correct: ', len(id_correct) )
    print('incorrect: ', len(id_incorrect) )
#     d_triallist[corruption]['correct'] = id_correct
#     d_triallist[corruption]['incorrect'] = id_incorrect
    
    # trial ids that take N steps to finish recognition
    POSSIBLE_NSTEP = [1, 2, 3, 4, 5]
    for n in POSSIBLE_NSTEP:
        id_step = np.where(nstep_masked==n)[0].tolist()
        d_triallist[corruption][f'step{n}'] = id_step
        print(f'step{n}: ', len(id_step))   




TASK: mnist_recon_low (# targets: 1, # classes: 10, # background: 0)
TIMESTEPS #: 5
ENCODER: resnet w/ None projection
...resulting primary caps #: 288, dim: 8
ROUTINGS # 3
Object #: 10, BG Capsule #: 0
DECODER: fcn, w/ None projection
...recon only one object capsule: True
...use recon mask for attention: True
...with mask type bool, threshold 0.1, apply_method match

original is used
==> corruption type: identity, this batch acc: 0.9937999844551086

correct:  9938
incorrect:  62
step1:  9847
step2:  26
step3:  7
step4:  1
step5:  57
original is used
==> corruption type: glass_blur, this batch acc: 0.9411999583244324

correct:  9412
incorrect:  588
step1:  8551
step2:  442
step3:  77
step4:  22
step5:  320
original is used
==> corruption type: motion_blur, this batch acc: 0.9667999744415283

correct:  9668
incorrect:  332
step1:  9052
step2:  360
step3:  36
step4:  15
step5:  205
original is used
==> corruption type: impulse_noise, this batch acc: 0.9702999591827393

correct:  9703
i

In [14]:
# save dictionary and summary stats
import json
with open('./results/nstep_trialid.json', 'w') as fp:
    json.dump(d_triallist, fp)
    
# get counts for each condition (nstep)
cols = list(d_triallist)
rows = list(d_triallist[cols[0]])
countdf = pd.DataFrame(columns=cols, index=rows)

for c in cols:
    for r in rows:
        countdf.loc[r,c] = len(d_triallist[c][r])
        
countdf.to_csv('./results/corruption_nstep_count.csv')
countdf

Unnamed: 0,identity,glass_blur,motion_blur,impulse_noise,shot_noise,fog,dotted_line,spatter,zigzag
step1,9847,8551,9052,7750,9609,912,9164,9535,7662
step2,26,442,360,1552,43,8265,565,117,1184
step3,7,77,36,188,15,401,48,20,217
step4,1,22,15,55,2,75,7,4,72
step5,57,320,205,158,97,81,63,148,157


In [None]:
########
# sample stimuli from each condition
##########
# low: 20 trials from step 1
# med: 20 trials from step2 and 3
# high: 20 trials from step4 and 5
import gc
import json

with open('./results/nstep_trialid.json', 'r') as fp:
    d_triallist = json.load(fp)
    
corruption = 'identity'
# ['identity', 'glass_blur','motion_blur', 'impulse_noise','shot_noise',
#         'fog','dotted_line','spatter', 'zigzag']


N_TRIAL_PER_CONDITON=20

id_step1_sampled = random.sample(d_triallist[corruption]['step1'], N_TRIAL_PER_CONDITON)
id_step23_sampled = random.sample(d_triallist[corruption]['step2']+d_triallist[corruption]['step3'], N_TRIAL_PER_CONDITON)
id_step45_sampled = random.sample(d_triallist[corruption]['step4']+d_triallist[corruption]['step5'], N_TRIAL_PER_CONDITON)

#########
# load model predictions for stimuli info
#########
task='mnist_c_original'
train=False #train or test dataset
print_args=False

args_to_update = {'device':DEVICE, 'batch_size':BATCHSIZE, 
                 'time_steps': 5, 'routings': 3,'mask_threshold': 0.1}

load_model_path = './models/rrcapsnet/rrcapsnet_best.pt' #run1

args = load_args(load_model_path, args_to_update, print_args)
model = load_model(args)

if task =='mnist_c_original':
    print("original is used")
    x, gtx, y_hot, acc_model, objcaps_len_step, x_recon_step = \
    evaluate_model_on_mnistc_original(corruption, model, verbose=False, save_hooks=False)
    print(f'==> corruption type: {corruption}, this batch acc: {acc_model.mean().item()}')
else:
    raise NotImplementedError

# get model prediction
objcaps_len_step_narrow = objcaps_len_step.narrow(dim=2,start=0, length=args.num_classes)
# pred_model = objcaps_len_step_narrow.max(dim=-1)[1][:,-1] #torch.Size([1000, 3])

if ACC_TYPE=='hypothesis':
    if args.time_steps==1:
        y_pred = objcaps_len_step_narrow[:,-1]
        accs = topkacc(y_pred, y_true, topk=1)
    else:
        acc_model_check, pred_model, nstep  = compute_hypothesis_based_acc(objcaps_len_step_narrow, y_hot, only_acc=False)

elif ACC_TYPE == 'entropy':    
    if args.time_steps==1:
        y_pred = objcaps_len_step_narrow[:,-1]
        accs = topkacc(y_pred, y_true, topk=1)
    else: 
        acc_model_check, pred_model, nstep, no_stop_condition, entropy_model =compute_entropy_based_acc(objcaps_len_step_narrow, y_hot, threshold=0.6, use_cumulative = False, only_acc= False)

assert round(acc_model.mean().item(), 4) == round(acc_model_check.float().mean().item(), 4)

########
# generate and save stimuli
########
path_save = './stimuli/stimuli-exp1-step5/'
POSSIBLE_NSTEP = list(d_triallist[corruption])

for step in POSSIBLE_NSTEP:              
    if step=='step1':
        trialid_to_visualize = id_step1_sampled 
    elif step=='step2' or step=='step3':
        trialid_to_visualize = [ti for ti in d_triallist[corruption][step] if ti in id_step23_sampled]
    elif step=='step4' or step=='step5':
        trialid_to_visualize = [ti for ti in d_triallist[corruption][step] if ti in id_step45_sampled]    

    for trialid in trialid_to_visualize:
        our_pred = pred_model[trialid].cpu().item()
        gt =y_hot.max(dim=1)[1][trialid].cpu().item()

        # save image x8 original pixel size
        imgarray = x[trialid].numpy()
        filename = f'{corruption}_{step}_t{trialid}_g{gt}_o{our_pred}.png'
        save_imgarr(np.transpose(imgarray,(1,2,0)), filename= path_save+filename)

print('all images are saved')
gc.collect()


In [14]:
## change filenames
# import os
# path = './stimuli/stimuli-exp1-step4/'

# for f in os.listdir(path):
#     if not f.startswith('.'):
#         fsplit = f.split('_')
#         stepsize = fsplit[-1].split('.')[0]
#         corruption =  fsplit[:-4]
#         newf = '_'.join(corruption + [stepsize] + fsplit[-4:-1] ) + '.png'
#         os.rename(path+f, path+newf)

# generate experiment source file

In [28]:
############################
# create trial file
############################
path_exp = './stimuli/stimuli-exp1-step5/'
# masklist = ['./stimuli/stimuli-mask/'+f for f in os.listdir('./stimuli/stimuli-mask/') if not f.startswith('.')]
masklist = [f for f in os.listdir('./stimuli/stimuli-mask/') if not f.startswith('.')] #when only filenames (not entire path) is used to create source file

df = pd.DataFrame(columns=['trialtype', 'corruption',  'cond', 'nstep', 'imgID', 'gt', 'our', 'imgpath', 'maskpath'] )


i=1
for f in os.listdir(path_exp):
    if not f.startswith('.'):
        
        corruption = '_'.join(f.split('_')[:-4])
        if corruption == 'identity': #use as practice trials
            trialtype = 'prac'
        else:
            trialtype = 'exp'
        
        nstep = int(f.split('_')[-4][-1])
        if nstep == 1:
            cond = 'low'
        elif nstep == 2 or nstep ==3:
            cond = 'med'
        elif nstep == 4 or nstep ==5:
            cond = 'high'
        else:
            raise NotImplementedError
        
        imgID = int(f.split('_')[-3][1:])
        gt = int(f.split('_')[-2][1:])
        our = int(f.split('_')[-1][1])
#         imgpath = path_exp + f
        imgpath = f
        maskpath= random.sample(masklist,1)[0]

        df.loc[i] = [trialtype, corruption, cond, nstep, imgID, gt, our, imgpath, maskpath]   
        i+=1
        
df = df.sort_values(by=['trialtype', 'corruption', 'cond']).reset_index(drop=True)
df_prac = df[df['trialtype']=='prac'].copy()
df_exp = df[df['trialtype']=='exp'].copy()

print('df_prac length ', len(df_prac), ' df_exp length ', len(df_exp))
print(df_exp.corruption.unique())

df_prac length  10  df_exp length  480
['dotted_line' 'fog' 'glass_blur' 'impulse_noise' 'motion_blur'
 'shot_noise' 'spatter' 'zigzag']


In [29]:
###############
# separate exp df into 5 unique sets; 5 sets * 96 images (8corruptions*4high*4med*4low)
###############
N_SET = 5
df_exp['cumcount'] = df_exp.groupby(['corruption', 'cond']).cumcount()+1
df_exp['setnum'] = df_exp['cumcount'].apply(lambda x: int(x%N_SET + 1))
df_exp = df_exp.drop(columns =['cumcount'])

sourcedf_all = []
for i in range(1, N_SET+1):
    expset = df_exp[df_exp.setnum==i] 
    expset = expset.sample(frac=1).reset_index(drop=True) #shuffle
    combined = df_prac.merge(expset, how='outer') # merge with prac
    combined = combined.reset_index(drop=True)
    combined['setnum'] = i # since prac parts has no set numbers
    sourcedf_all.append(combined)
#     combined.to_csv(f'exp1_source{i}.csv', index=False)

df_all = pd.concat(sourcedf_all)
# print('source csvs are saved to disk')

In [30]:
df_all.head(20)

Unnamed: 0,trialtype,corruption,cond,nstep,imgID,gt,our,imgpath,maskpath,setnum
0,prac,identity,low,1,1770,7,7,identity_step1_t1770_g7_o7.png,mask_1.png,1
1,prac,identity,low,1,9570,1,1,identity_step1_t9570_g1_o1.png,mask_5.png,1
2,prac,identity,low,1,2114,5,5,identity_step1_t2114_g5_o5.png,mask_6.png,1
3,prac,identity,low,1,4758,3,3,identity_step1_t4758_g3_o3.png,mask_9.png,1
4,prac,identity,low,1,9397,9,9,identity_step1_t9397_g9_o9.png,mask_8.png,1
5,prac,identity,low,1,1202,8,8,identity_step1_t1202_g8_o8.png,mask_6.png,1
6,prac,identity,low,1,5872,1,1,identity_step1_t5872_g1_o1.png,mask_8.png,1
7,prac,identity,low,1,6251,4,4,identity_step1_t6251_g4_o4.png,mask_9.png,1
8,prac,identity,low,1,1106,6,6,identity_step1_t1106_g6_o6.png,mask_6.png,1
9,prac,identity,med,2,3778,5,5,identity_step2_t3778_g5_o5.png,mask_3.png,1


In [31]:
df_all.to_csv('./stimuli/datasource-exp1.csv', index=False)

In [4]:
######
# how many unique images overlap?
######
import pandas as pd
df_all = pd.read_csv('./stimuli/datasource-exp1.csv')
df_exp = df_all[df_all['trialtype']=='exp']
# df_exp.imgID.nunique() #449
# len(df_exp.imgID) # 480 --> 31 overlap, making 6%