In [None]:
import pandas as pd 
import numpy as np 
import pathlib as path
import matplotlib.pyplot as plt
import random
from PIL import Image

# Loading data 

In [None]:
#path to pilot folder : this is where the similarity_fungi_all.csv is located
PATH_RESULTS_FOLDER = path.Path(r'C:\Users\User\Desktop\pilot')
#path to target images: 
PATH_TARGET_IMAGES_FOLDER = path.Path(r'C:\Users\User\Desktop\pilot\selected_images_best_mean')
#path to all images: 
PATH_ALL_IMAGES_FOLDER=path.Path(r'C:\Users\User\Desktop\human similarity database\all images\DF20_300_manualy_selected\manualy')
#path to save outputs
PATH_OUTPUTS=path.Path(r'C:\Users\User\Desktop\pilot\Outputs')

In [None]:
similarity_df = pd.read_csv(PATH_RESULTS_FOLDER / "similarity_fungi.csv")
similarity_df.head(5)

In [None]:
# keeping only relevent data : image names and similarity across levels 
similarity_df = similarity_df.drop(['Unnamed: 0','model','category1','category2'], axis = 1)
similarity_df.head(5)

# Experimental Setup Parameters 

In [None]:
# specify the levels you want to average over in each sub list. The first sublist ['level_0','level_1'] is the layers 
# we wish to average over to get the lower layers average similarity. The same goes for the second and third sublist. 
levels = [['level_0'],['level_1','level_2','level_3'],['level_4','level_5','level_6']]


In [None]:
# these parameters are for the second part of the notetbook where we create
# the actual trials from the experimental setup table
number_subjects_each_image = 10
total_number_levels = 3 
number_target_images = 6

### Loading the selected images data and saving names 

In [None]:
# loading images from sub folders and saving the image names in a list "selected_images"
# the selected images are .jpg but the image names in similarity df are .pkl 
# so we will change all names to pkl
selected_images_paths = list(PATH_TARGET_IMAGES_FOLDER.glob("*/*.jpg"))
selected_images = [image.name.replace('.jpg','.pkl') for image in selected_images_paths]

print(f'The total number of selected images is : {len(selected_images)}')
print('The selected images names look like:')
print(selected_images[0:3])

# PART 1 : Creating experimental setup table
#### For each selected image, we find the most similar image on average from the specified sublayers. 

In [None]:
layer_matches_0 = []
layer_matches_1 = []
layer_matches_2 = []
selected_matches = []

for target_image in selected_images:
    # list of other selected images 
    other_images = [x for x in selected_images if x != target_image]
    # keeping only relevent rows with target image 
    current_image_df = similarity_df[(similarity_df['image1'] == target_image) | (similarity_df['image2'] == target_image)]
    
    # removing rows with similarity to other target images 
    for image in other_images:
        idx_image1 = current_image_df['image1'] == image
        idx_image2 = current_image_df['image2'] == image
        total_idx = idx_image1 + idx_image2
        current_image_df = current_image_df.drop(total_idx[total_idx == True].index, axis = 0)
        
    # removing rows with similarity to matched images that already have been picked 
    for image in selected_matches:
        idx_image1 = current_image_df['image1'] == image
        idx_image2 = current_image_df['image2'] == image
        total_idx = idx_image1 + idx_image2
        current_image_df = current_image_df.drop(total_idx[total_idx == True].index, axis = 0)
        
    
    for idx,level in enumerate(levels):
        # finding max similarity row
        best_trial = current_image_df.loc[np.mean(current_image_df[level], axis = 1).idxmax()]
        # saving name of pair image (it can be in one of the two columns)
        if best_trial['image1'] != target_image:
            image_name = best_trial['image1']
        else: 
            image_name = best_trial['image2']
        
        selected_matches.append(image_name)
        # appending to relevent list 
        if idx == 0:
            layer_matches_0.append(image_name)
        elif idx == 1:
            layer_matches_1.append(image_name)
        elif idx ==2:
            layer_matches_2.append(image_name)
            
        # dropping matched image so it cant be picked again 
        current_image_df = current_image_df.drop(best_trial.name, axis = 0)

        
experiment_setup_df = pd.DataFrame(list(zip(selected_images,layer_matches_0,layer_matches_1,layer_matches_2)),columns = ['target_image','layer1','layer2','layer3'])
experiment_setup_df.head(5)

## Lets have a look at the pairs 

In [None]:
def plot_images_with_distractors_for_pilot(experiment_setup_df,PATH_TARGET_IMAGES_FOLDER,PATH_OUTPUTS,ncols=4,nrows=20,im_range=np.arange(0,20),fig_name='default'):
    allimages_list=list(PATH_TARGET_IMAGES_FOLDER.rglob('*.jpg'))
    allimages_list_namesonly=[img.name for img in allimages_list]

    ncols=4
    nrows=20
    # TODO: add a function to look at the selected pairs when you have all images in folder 
    fig,axes=plt.subplots(nrows=nrows,ncols=ncols,figsize=(ncols*2*2,nrows*2*2),sharex=True,sharey=True,gridspec_kw = {'wspace':0.2, 'hspace':0})
    fig.patch.set_facecolor('white')
    cnt=0
    for im_ind in im_range:
        cur_img=experiment_setup_df.iloc[im_ind]['target_image']
        cur_img_jpg=cur_img.replace('.pkl','.jpg')
        im_index=allimages_list_namesonly.index(cur_img_jpg)
        cur_img_jpg=allimages_list[im_index]
        l1_img=PATH_ALL_IMAGES_FOLDER / experiment_setup_df.iloc[im_ind]['layer1'].replace('.pkl','.jpg')
        l2_img=PATH_ALL_IMAGES_FOLDER / experiment_setup_df.iloc[im_ind]['layer2'].replace('.pkl','.jpg')
        l3_img=PATH_ALL_IMAGES_FOLDER / experiment_setup_df.iloc[im_ind]['layer3'].replace('.pkl','.jpg')

        cur_img_jpg=Image.open(cur_img_jpg)
        axes[cnt,0].imshow(cur_img_jpg)
        axes[cnt,0].set_title(experiment_setup_df.iloc[im_ind]['target_image'].replace('.pkl',''),color='k')
        l1_img=Image.open(l1_img)
        axes[cnt,1].imshow(l1_img)
        axes[cnt,1].set_title(experiment_setup_df.iloc[im_ind]['layer1'].replace('.pkl',''),color='k')
        l2_img=Image.open(l2_img)
        axes[cnt,2].imshow(l2_img)
        axes[cnt,2].set_title(experiment_setup_df.iloc[im_ind]['layer2'].replace('.pkl',''),color='k')
        l3_img=Image.open(l3_img)
        axes[cnt,3].imshow(l3_img)
        axes[cnt,3].set_title(experiment_setup_df.iloc[im_ind]['layer3'].replace('.pkl',''),color='k')

        axes[cnt,0].set_xticks([])
        axes[cnt,0].set_yticks([])

        cnt=cnt+1
    #save the figure: 
    svg_name=fig_name + '.svg'
    png_name=fig_name + '.png'

    fig.savefig(PATH_OUTPUTS / svg_name,format='svg',bbox_inches='tight')
    fig.savefig(PATH_OUTPUTS / png_name,format='png',bbox_inches='tight')
    return fig


In [None]:
fig1=plot_images_with_distractors_for_pilot(experiment_setup_df,PATH_TARGET_IMAGES_FOLDER,PATH_OUTPUTS,ncols=4,nrows=20,im_range=np.arange(0,20),fig_name='all_layer1')


In [None]:
fig2=plot_images_with_distractors_for_pilot(experiment_setup_df,PATH_TARGET_IMAGES_FOLDER,PATH_OUTPUTS,ncols=4,nrows=20,im_range=np.arange(20,40),fig_name='all_layer2')


In [None]:
fig3=plot_images_with_distractors_for_pilot(experiment_setup_df,PATH_TARGET_IMAGES_FOLDER,PATH_OUTPUTS,ncols=4,nrows=20,im_range=np.arange(40,60),fig_name='all_layer3')


In [None]:
TODO: #allow to insert a list of bad distractor images and replace them: 

#get a list of "bad" distractors: remove them and replace with the second best matches.
bad_distractors=['2238482279-94198','2868474407-362883']


layer_cols=[col for col in experiment_setup_df.columns if 'layer' in col]
for layer in layer_cols:
    location=np.where(experiment_setup_df[layer].str.contains('2238482279-94198', na=True))[0]
    if not len(location)==0:
        print(location[0])

In [None]:
similarity_df.mean().T.plot()

# PART 2 : Creating the experiment setup trials from table

In [None]:
experiment_setup_df_dup = pd.DataFrame(np.repeat(experiment_setup_df.values, number_subjects_each_image*total_number_levels, axis=0),columns = experiment_setup_df.columns)

In [None]:
total_trials_pool = pd.DataFrame()
id_1 = 0 
layer_id = 1

for i in range(len(selected_images)*len(levels)):
    # name of current layer
    layer_name = 'layer'+str(layer_id)
    # upper index
    id_2 = id_1 + 10 
    # current section of data frame
    temp_df = experiment_setup_df_dup.iloc[id_1:id_2][['target_image',f'{layer_name}']]
    # adding layer column 
    temp_df['layer'] = layer_id 
    # re naming column for concat
    temp_df = temp_df.rename({f'{layer_name}': 'pair'}, axis='columns')
    # concatinating 
    total_trials_pool = pd.concat([total_trials_pool,temp_df], axis = 0)
    
    # updating id 
    id_1 = id_2
    id_2 = id_2 + 10
    # updating layer name 
    if layer_id == 3:
        layer_id = 1 
    else: 
        layer_id +=1


In [None]:
total_trials_pool.head()


In [None]:
total_trials_pool = pd.DataFrame(data = [target,pair,pair],columns = ['target_image','pair','layer'])
total_trials_pool.head

In [None]:
counter_dict = {}
for subject in range(1): # number_subjects_each_image*total_number_levels*number_target_images):
    
    # creating the encoding csv 
    subject_df = total_trials_pool.copy()
    sub_encoding_trials_df = pd.DataFrame()
    while len(subject_df)>0:
        
        id = np.random.choice(len(subject_df))
        random_trial = subject_df.iloc[id]
        # adding random trial to subject trials 
        sub_encoding_trials_df = pd.concat([sub_encoding_trials_df,random_trial], axis = 1)
        
        #counting in dict 
        key_name =  random_trial['target_image'] + '-' + random_trial['pair'] 
        if key_name in counter_dict:
            counter_dict[key_name] +=1
        else:
            counter_dict[key_name] = 1
        
        
        # removing trial from total trial pool 
        total_trials_pool = total_trials_pool.drop(random_trial.name, axis = 0)
        
        # removing all trials with selected image 
        subject_df = subject_df.drop(subject_df[subject_df['target_image'] == random_trial['target_image']].index, axis = 0)
         
    
    sub_encoding_trials_df = sub_encoding_trials_df.T.reset_index(drop=True)
    # creating the test csv, before adding the arrow trials - not relevent for test phase
    sub_test_trials_df = sub_encoding_trials_df.copy()
    
    # adding correct colomn with none for all images 
    sub_encoding_trials_df['correct'] = None
    # inserting random arrows - left and right every 14 images
    top_idx = [13,28,43,57]
    random_noise = [0,1,2,-2,-1]
    arrows = ['right.png','left.png']

    for arrow in range(len(top_idx)):
        chosen_noise = random.choice(random_noise)
        print(chosen_noise)
        chosen_arrow = random.choice(arrows)
        print(chosen_arrow)
        chosen_idx = top_idx[arrow] + chosen_noise - 0.5
        print(chosen_idx)
        correct = chosen_arrow.split('.')[0]
        print(correct)

        new_row = [chosen_arrow, None, None,correct]


        # adding row 
        sub_encoding_trials_df.loc[chosen_idx] = new_row
        sub_encoding_trials_df = sub_encoding_trials_df.sort_index().reset_index(drop=True)
        
    # saving to csv - commenting for now.. 
    sub_encoding_trials_df.to_csv(f'sub_encoding{subject}.csv', index=False)
    
    
   
    correct_column = []

    for row in sub_test_trials_df.T:
        random_pick = np.random.randint(0,2)

        if random_pick == 0:
            correct_column.append('left')
        elif random_pick == 1:
            sub_test_trials_df.loc[row,['target_image','pair']] = sub_test_trials_df.loc[row,['pair','target_image']].values

            correct_column.append('right')

    sub_test_trials_df['correct'] = correct_column
    sub_test_trials_df.columns = ['image1', 'image2', 'layer', 'correct']

    sub_test_trials_df.to_csv(f'sub_test{subject}.csv', index=False)

In [None]:
print('The total trials for each combination is:') 
# Iterate over key/value pairs in dict and print them
for key, value in counter_dict.items():
    print(key, ' : ', value)