In [32]:
import json
import pandas as pd
import time
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import os
pd.options.mode.chained_assignment = None 
%matplotlib inline

In [76]:
def individual_consistency_check(trait_name, sub_folder_prefix, plot_or_not):
    # trait_name = 'attractive' and etc. 
    # sub_folder_prefix = '' for gt data, = 'modifae_' for modifae data, = 'modifae_new_' for new
    if sub_folder_prefix == '':
        task_word = 'raw images'
    elif sub_folder_prefix == 'modifae_':
        task_word = 'modified '
    else:
        task_word = 'modified new'
    
    likert_data = pd.read_csv('./'+ sub_folder_prefix + trait_name+'/likert_data.csv')

    sub_num_dict = {}
    sub_counter = 1
    for sub_id in likert_data['subId']:
        if sub_id not in sub_num_dict:
            sub_num_dict[sub_id] = sub_counter
            sub_counter += 1

    likert_data['subNum'] = likert_data['subId'].map(sub_num_dict)

    img_num_dict = {}
    img_counter = 0
    for img_name in likert_data['imgName']:
        if img_name not in img_num_dict:
            img_num_dict[img_name] = img_counter
            img_counter += 1

    likert_data['imgNum'] = likert_data['imgName'].map(img_num_dict)

    likert_data = likert_data.sort_values(by=['subNum', 'imgNum'], ascending=True)
    likert_data = likert_data[['subNum', 'imgNum', 'rating', 'rt', 'imgName', 'trial_index']]
    likert_data['rating'] = likert_data['rating'].astype(np.int64)
    
    sub_num = likert_data['subNum'].nunique()

    rho_lst = []
    p_lst = []
    for sub_num in range(1, sub_num+1):
        cur_sub_data = likert_data[likert_data['subNum']==sub_num]
        cur_sub_data['freq'] = cur_sub_data.groupby('imgNum')['imgNum'].transform('count')
        num_rating_used = cur_sub_data['rating'].nunique()

        
        repeat_lst = cur_sub_data[cur_sub_data['freq']==2]
        p = repeat_lst.sort_values(by=['imgNum'])
        p1 = p[::2]
        p2 = p[1::2]

        first_half = p1['rating'].values
        second_half = p2['rating'].values
        rho, p = spearmanr(first_half, second_half)
        if num_rating_used < 3:
            print sub_num, trait_name, task_word, num_rating_used, rho
        else:
            rho_lst.append(rho)
            p_lst.append(p)
        
        if plot_or_not:
            unique_coor = sorted(set(zip(first_half, second_half)))
            total_coor = list(zip(first_half, second_half))

            size = []

            for coor in unique_coor:
                size.append(total_coor.count(coor))

            x_unique_coor, y_unique_coor = zip(*unique_coor)

            colors = np.random.rand(len(x_unique_coor))

            plt.title('sub' + str(sub_num)+'rho = {:.2f}, p = {:.2f}'.format(rho, p))
            plt.xlabel('First repeat')
            plt.ylabel('Second repeat')
            plt.scatter(x_unique_coor, y_unique_coor, s=np.array(size)*100, alpha=0.5, c=colors)
            plt.show()
            
    rho_array = np.asarray(rho_lst)
    p_array = np.asarray(p_lst)
    print('{}, {}, sub num = {}, rho ave = {:.2f}, std = {:.2f}'.format(task_word, trait_name, sub_num, rho_array.mean(), rho_array.std()))
    
    return rho_array, p_array

In [77]:
trait_lst = ['attractive', 'aggressive', 'trustworthy', 'intelligent']
img_source_lst = ['', 'modifae_', 'modifae_new_']

for cur_trait in trait_lst:
    for cur_source in img_source_lst:
        individual_consistency_check(cur_trait, cur_source, False)
    print('==========')

raw images, attractive, sub num = 15, rho ave = 0.50, std = 0.45
modified , attractive, sub num = 16, rho ave = 0.70, std = 0.34
modified new, attractive, sub num = 15, rho ave = 0.61, std = 0.38
14 aggressive raw images 2 -0.502518907629606
raw images, aggressive, sub num = 15, rho ave = 0.38, std = 0.42
modified , aggressive, sub num = 15, rho ave = 0.27, std = 0.37
modified new, aggressive, sub num = 14, rho ave = 0.39, std = 0.49
5 trustworthy raw images 2 -0.19191919191919196
raw images, trustworthy, sub num = 15, rho ave = 0.27, std = 0.33
modified , trustworthy, sub num = 14, rho ave = 0.39, std = 0.37
modified new, trustworthy, sub num = 12, rho ave = 0.57, std = 0.43
13 intelligent raw images 2 0.393939393939394
raw images, intelligent, sub num = 15, rho ave = 0.61, std = 0.29
1 intelligent modified  1 nan
modified , intelligent, sub num = 15, rho ave = 0.45, std = 0.38
modified new, intelligent, sub num = 12, rho ave = 0.75, std = 0.25
