## Load libraries

In [40]:
import os, sys
import pandas as pd
from IPython.display import clear_output
import importlib
import numpy as np
import re
import base64
import ast
import warnings
import random
import string
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr,pearsonr, kendalltau
from sklearn.metrics.pairwise import cosine_distances, cosine_similarity
from scipy.stats import entropy, ttest_ind,ttest_rel
from tqdm import tqdm
import statsmodels.api as sm
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'Helvetica'
warnings.filterwarnings('ignore')

pd.options.mode.chained_assignment = None

import scripts.utils as utils

%load_ext autoreload
%autoreload 2

import hashlib

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


###  directory & file hierarchy

In [None]:
# directory & file hierarchy
proj_dir = os.path.abspath('../..')
stimuli_dir = os.getcwd()
results_dir = os.path.join(proj_dir,'results')
analysis_dir = os.path.join(proj_dir,'analysis')
data_dir = os.path.join(proj_dir,'data')
# plot_dir = os.path.join(results_dir,'plots')
# csv_dir = os.path.join(results_dir,'csv')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))

## add helpers to python path
if os.path.join(proj_dir,'utils') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'utils'))

def make_dir_if_not_exists(dir_name):   
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    return dir_name

###  helper functions

In [None]:
# def SDI(data):
#     """
#     Calculate the Simpson's Diversity Index for a list of data.
#     """
#     N = np.sum(data)
#     n = np.array(data)
    
#     p = n / N
#     print(p)
#     return 1 - np.sum(p ** 2)


# def mm_normalize(row, min_max_dict):
#     min_recog = min_max_dict[row['uniqueID']]['min']
#     max_recog = min_max_dict[row['uniqueID']]['max']
#     if (max_recog - min_recog) == 0:
#         return row['mean_accuracy']
#     norm_acc = (row['mean_accuracy'] - min_recog) / (max_recog - min_recog)
#     return norm_acc




# def compute_similarities(df, spose_embeds, spose_cols, random=False):
#     similarities = []
#     for _, row in df.iterrows():
#         if random:
#             target_concept = np.random.choice(spose_embeds['concept'].unique())
#         else:
#             target_concept = row.uniqueID
        
#         target_embed = spose_embeds[spose_embeds['concept'] == target_concept][spose_cols].values[0]
#         responses = row.response_list
#         response_embeds = [spose_embeds[spose_embeds['concept'] == resp][spose_cols].values[0] for resp in responses]
#         mean_response_embed = np.mean(response_embeds, axis=0)
#         similarity_score = cosine_similarity(target_embed.reshape(1, -1), mean_response_embed.reshape(1, -1))[0][0]
#         similarities.append(similarity_score)
#     return similarities

# def spose_permutation_test(df, spose_embeds, spose_cols, n_permutations=10):
#     # Compute original similarities
#     df['similarity_score'] = compute_similarities(df, spose_embeds, spose_cols)
#     df_agg = df.groupby('uniqueID').agg({'similarity_score': 'mean', 'correct': 'mean'}).reset_index()
#     original_r, _ = pearsonr(df_agg.similarity_score, df_agg.correct)
    
#     # Permutation test
#     permuted_rs = []
#     for _ in range(n_permutations):
#         df['similarity_score'] = compute_similarities(df, spose_embeds, spose_cols, random=True)
#         df_agg = df.groupby('uniqueID').agg({'similarity_score': 'mean', 'correct': 'mean'}).reset_index()
#         r, _ = pearsonr(df_agg.similarity_score, df_agg.correct)
#         permuted_rs.append(r)
    
#     p_value = np.mean(np.abs(permuted_rs) >= np.abs(original_r))
    
#     return original_r, permuted_rs, p_value

# # Run the permutation test
# r, permuted_rs, p_value = spose_permutation_test(things_draw_recog_df, spose_embeds, spose_cols)

# print(f"Original r: {r}")
# print(f"P-value: {p_value}")

In [None]:
# plt.figure(figsize=(10, 6))
# plt.hist(permuted_rs, bins=50, edgecolor='black')
# plt.axvline(r, color='red', linestyle='dashed', linewidth=2)
# plt.title('Distribution of Permuted Correlation Coefficients')
# plt.xlabel('Pearson r')
# plt.ylabel('Frequency')
# plt.show()

In [None]:
def encrypt_worker_id(worker_id, salt='dot2025'):
    """
    Encrypt a single worker ID using SHA-256 hash function.
    
    Parameters:
    -----------
    worker_id : str or any
        The worker ID to encrypt
    salt : str, optional
        A random string to add to the worker ID before hashing for additional security
        
    Returns:
    --------
    str
        Encrypted worker ID as a hexadecimal string
    """
    # Convert to string if not already
    worker_id_str = str(worker_id)
    # Add salt to the worker ID and encode to bytes
    salted_id = (worker_id_str + salt).encode('utf-8')
    # Create hash and return hex digest
    return hashlib.sha256(salted_id).hexdigest()

### load dataframes

In [None]:
things_main_df = pd.read_csv(os.path.join(data_dir, 'things_concepts.tsv'), sep='\t') ### things concepts metadata
concept2uid = dict(zip(things_main_df['Word'], things_main_df['uniqueID'])) ### dataframe that maps  words to uniqueIDs
things1854concepts = things_main_df.uniqueID.values ### list of things concepts
concept2category_dict = dict(zip(things_main_df.uniqueID, things_main_df['All Bottom-up Categories'])) ### map the concepts to categories using wordnet(?) categories

things_gpt_word_embeds = pd.read_csv(os.path.join(data_dir, 'things1854_gpt_embeddings.csv'), index_col=0) ### embeddings from GPT circa 2023
things_gpt_def_embeds = pd.read_csv(os.path.join(data_dir, 'things1854_def_gpt_embeddings.csv'),index_col=0) ### embeddings from definitions of THINGS concepts 2023 

In [None]:
### list all the files in data_dir/things_draw_recog if they are csv files
things_draw_recog_dir = os.path.join(data_dir,'thingsdraw_recog')
things_draw_prod_dir = os.path.join(data_dir,'things_drawing_1854_complete')
things_draw_recog_files = [f for f in os.listdir(things_draw_recog_dir) if os.path.isfile(os.path.join(things_draw_recog_dir, f)) and f.endswith('sketches.csv')]
things_draw_prod_files = [f for f in os.listdir(things_draw_prod_dir) if os.path.isfile(os.path.join(things_draw_prod_dir, f)) and f.endswith('.csv')]
### read in all the files in things_draw_recog_files and concatenate into a single dataframe
things_draw_recog_df = pd.concat([pd.read_csv(os.path.join(things_draw_recog_dir,f),index_col='Unnamed: 0') for f in things_draw_recog_files],ignore_index=True)
things_draw_prod_df = pd.concat([pd.read_csv(os.path.join(things_draw_prod_dir,f),index_col='Unnamed: 0') for f in things_draw_prod_files],ignore_index=True)
things_draw_recog_demo_df = pd.read_csv(os.path.join(things_draw_recog_dir,'things_draw_recog_demographics.csv')) ### demographics for the recognition task

qc_df = pd.read_csv(os.path.join(data_dir,'qc_df_tmp.csv')) #### quality control df 
valid_counts = qc_df.groupby('sketch_id')['valid'].sum() ### get the number of valid counts
valid_ids = valid_counts[valid_counts>=2].index.values ### only include the sketches that have >=2 valid counts


### THINGS External data

In [None]:
### THNGS Sprase Embeddings
spose_embeds = pd.read_csv(os.path.join(data_dir,'THINGS_spose.txt'), sep='\t', header=None) 
spose_cols = spose_embeds.columns.tolist()
spose_embeds['concept']=things1854concepts

#### THINGS Memorability data
THINGS_mem = pd.read_csv(os.path.join(data_dir,'THINGS_memorability.csv'))

### THINGS+ data
things_plus_df= pd.read_csv(os.path.join(data_dir,'THINGSplus_categories.tsv'),sep='\t')
things_plus_dict = dict(zip(things_plus_df.uniqueID, things_plus_df.category))

### THINGS image recognizability
things_image_recongizability = pd.read_csv(os.path.join(data_dir,'THINGS_recognizability.csv'),index_col=0).reset_index()

In [None]:
### look at which concepts are recognized the best on average (for images): 
things_image_recongizability.groupby('uniqueID')['recognizability'].mean().sort_values(ascending=False)

In [None]:
### compute accuracy on practice trials
recog_practice_trials = things_draw_recog_df[things_draw_recog_df['filename'].astype(str).isin(['cat','nan'])].reset_index(drop=True) 
recog_practice_trials['response_list'] = recog_practice_trials['response'].apply(lambda x: list(ast.literal_eval(x).values()))
recog_practice_trials['accuracy'] = recog_practice_trials.apply(lambda x: any(animal in x['response_list'] for animal in ['cat', 'dog', 'kitten', 'fox','chihuaha','puppy']), axis=1)
practice_trial_fail_ids = recog_practice_trials[recog_practice_trials.accuracy==False].workerID.to_list()

print(f'mean accuracy on the cat drawing recognition trials: {recog_practice_trials["accuracy"].mean()}')


In [24]:
print(f"Number of subjects in drawing study prior to excluding for valid workerIDS: {len(things_draw_prod_df.workerID.unique())}")
print(f"Number of subjects in recognition study prior to excluding for valid workerIDS: {len(things_draw_recog_df.workerID.unique())}")

Number of subjects in drawing study prior to excluding for valid workerIDS: 1315
Number of subjects in recognition study prior to excluding for valid workerIDS: 1557


In [None]:
prod_exclusions = ['61bb389740db417c1a138cad'] ### exclude these workers from the analysis
missing_meta_sketches = ['641b5bfc88da294e33e7779c', '641b5bae88da294e33e77519',
       '641b5dac88da294e33e78290', '642b11d7d30b092e53f1ca40',
       '642b2d962fc03e2456b46930', '642b55e7fb5d0a582eae4db6']  ### exclude these workers from the analysis

things_draw_recog_df = things_draw_recog_df[~things_draw_recog_df['filename'].astype(str).isin(['cat','nan'])].reset_index(drop=True) ### remove practice trials
things_draw_recog_df['response_list'] =  things_draw_recog_df['response'].apply(lambda x: list(ast.literal_eval(x).values()))

### add category info 
things_draw_recog_df['category'] = things_draw_recog_df['uniqueID'].apply(lambda x: things_plus_dict[x] if x in things_plus_dict.keys() else 'other')
things_draw_recog_df['cat_response_list'] = things_draw_recog_df['response_list'].apply(lambda x: [things_plus_dict[i] if i in things_plus_dict.keys() else 'other' for i in x])

## remove any rows where response_list is empty
things_draw_recog_df = things_draw_recog_df[things_draw_recog_df['response_list'].apply(lambda x: len(x)>0)].reset_index(drop=True)

things_draw_recog_df['sketch_id']=things_draw_recog_df.filename_recog.apply(lambda x: str(x).split('.')[0])
things_draw_recog_df['correct'] = things_draw_recog_df.apply(lambda x: x['uniqueID'] in (x['response_list']), axis=1)
things_draw_recog_df['cat_correct'] = things_draw_recog_df.apply(lambda x: x['category'] in (x['cat_response_list']), axis=1)
things_draw_recog_df['top1_correct'] = things_draw_recog_df.apply(lambda x: x['response_list'][0] == x['uniqueID'], axis=1)
things_draw_recog_df['top1_cat_correct'] = things_draw_recog_df.apply(lambda x: x['cat_response_list'][0] == x['category'], axis=1)


### do some exclusions
things_draw_recog_df = things_draw_recog_df[things_draw_recog_df.sketch_id.isin(valid_ids)]
things_draw_recog_df = things_draw_recog_df[~things_draw_recog_df['workerID'].isin(practice_trial_fail_ids)].reset_index(drop=True)
things_draw_prod_df = things_draw_prod_df[~things_draw_prod_df['workerID'].isin(prod_exclusions)].reset_index(drop=True)
things_draw_prod_df = things_draw_prod_df[~things_draw_prod_df['_id'].isin(missing_meta_sketches)].reset_index(drop=True)


sketch_trials_df = things_draw_prod_df[things_draw_prod_df.trial_type=='sketchpad'].sort_values(by=['sessionID','trial_index']).reset_index(drop=True)
sketch_trials_df['display_label'] = sketch_trials_df['prompt'].apply(lambda x: x.split('>')[1].split('<')[0])   


## familiarity trials
fam_trials_df = things_draw_prod_df[(things_draw_prod_df.trial_type=='survey-multi-choice')&(things_draw_prod_df.trial_index<50)].sort_values(by=['sessionID','trial_index']).reset_index(drop=True)




In [None]:
print(f"Number of sketches prior to excluding for valid workerIDS: {len(sketch_trials_df)}")
print(f"Number of subjects prior to excluding for valid workerIDS: {len(things_draw_prod_df.workerID.unique())}")


In [None]:
sketch_trials_df= sketch_trials_df[sketch_trials_df['workerID'].astype(str)!='nan']
print(f"Number of sketches after excluding for valid workerIDS: {len(sketch_trials_df)}")
sketch_trials_df = sketch_trials_df[sketch_trials_df['_id'].isin(valid_ids)].reset_index(drop=True)
print(f"Number of sketches after excluding for quality control sketches: {len(sketch_trials_df)}")

fam_trials_df= fam_trials_df[fam_trials_df['workerID'].astype(str)!='nan']

## subset recognition data to only include sketches that we know to be valid
things_draw_recog_df = things_draw_recog_df[things_draw_recog_df['sketch_id'].isin(sketch_trials_df['_id'].unique())].reset_index(drop=True)


In [None]:
fam_trials_df['response'] = fam_trials_df.apply(lambda x: ast.literal_eval(x.response),axis=1)
fam_trials_df['recog'] = fam_trials_df.response.apply(lambda x: True if x['Q0']=='Yes' else False)
mean_fam_df = fam_trials_df.groupby('concept')['recog'].mean().reset_index()

for i,row in sketch_trials_df.iterrows():
    this_fam_row = fam_trials_df[(fam_trials_df['sessionID']==row['sessionID']) \
                              & (fam_trials_df['trial_index']==row['trial_index']+1) &\
                              (fam_trials_df['concept']==row['concept'])]
    if this_fam_row.shape[0]==0:
        this_fam_row.append(row)
        print('not matched')
    else:
        sketch_trials_df.loc[i,'familiar'] = this_fam_row.recog.values[0]
        

### Saving out CSVs


In [41]:
### rename _id to sketch_id
sketch_trials_df['subject_id'] = sketch_trials_df['workerID'].apply(lambda x: encrypt_worker_id(x))
sketch_trials_out = sketch_trials_df[['subject_id','_id','trial_index','concept','familiar','strokes','undo_history',\
                                      'rt','time_elapsed','pointer_device']]

sketch_trials_out = sketch_trials_out.rename(columns={'_id':'sketch_id'})
sketch_trials_out.to_csv(os.path.join(data_dir,'things-drawings-prod-clean.csv'),index=False)                  

In [42]:
things_draw_recog_df['subject_id'] = things_draw_recog_df['workerID'].apply(lambda x: encrypt_worker_id(x))
recog_trials_out = things_draw_recog_df[['subject_id','trial_index','rt','concept','uniqueID','response','filename',
                                      'sketch_id','response_list']]
recog_trials_out.to_csv(os.path.join(data_dir,'things-drawings-recog-clean.csv'),index=False)

In [None]:
# things_draw_recog_df['familiar'] = things_draw_recog_df.apply(lambda x: sketch_trials_df[sketch_trials_df._id==x['sketch_id']]['familiar'].values[0],axis=1)

## here for each sketch_id in the recognition task, we want to get the familiarity rating from the familiarity trials in the production task
invalids=[]
for i,row in things_draw_recog_df.iterrows():
    this_sketch_row = sketch_trials_df[sketch_trials_df._id==row['sketch_id']]
    if this_sketch_row.shape[0]==0:
        invalids.append(row['sketch_id'])
        print('not matched')
    else:
        things_draw_recog_df.loc[i,'familiar'] = this_sketch_row.familiar.values[0]
        


In [None]:
### this creates a drawing x concept recognition vector where the number in each cell tells us the number of times that drawing was labeled as that concept
###this also takes a while to run
recog_response_vec_df = things_draw_recog_df.explode('response_list').groupby(['filename','sketch_id',\
                                                    'concept','uniqueID','category','num_strokes','familiar'])['response_list'].value_counts().unstack(fill_value=0).reset_index()

non_label_concepts = np.setdiff1d( things1854concepts, recog_response_vec_df.columns) ### labels that were never used
## add all the elemnts of non_label_concepts to human_response_vec_df and set them to 0
for concept in non_label_concepts:
    recog_response_vec_df[concept]=0

recog_response_vec_df['mean_accuracy']=recog_response_vec_df.apply(lambda x:things_draw_recog_df[things_draw_recog_df['filename']==x.filename].correct.mean(), axis=1)
recog_response_vec_df['mean_top1_accuracy']=recog_response_vec_df.apply(lambda x:things_draw_recog_df[things_draw_recog_df['filename']==x.filename].top1_correct.mean(), axis=1)


### this is the same as above but uses category labels from things plus instead of things object concepts
recog_response_vec_cat_df = things_draw_recog_df.explode('cat_response_list').groupby(['filename','sketch_id',\
                                                    'concept','uniqueID','category','num_strokes','familiar'])['cat_response_list'].value_counts().unstack(fill_value=0).reset_index()


recog_response_vec_cat_df['mean_accuracy']=recog_response_vec_cat_df.apply(lambda x:things_draw_recog_df[things_draw_recog_df['filename']==x.filename].cat_correct.mean(), axis=1)
recog_response_vec_cat_df['mean_top1_accuracy']=recog_response_vec_cat_df.apply(lambda x:things_draw_recog_df[things_draw_recog_df['filename']==x.filename].top1_cat_correct.mean(), axis=1)


In [None]:
### sot items by things concept name
recog_response_vec_df = recog_response_vec_df.sort_values(by=['uniqueID'])
recog_response_vec_cat_df = recog_response_vec_cat_df.sort_values(by=['uniqueID'])

## Results!

#### Summary Stats

In [None]:
### get the number of labels for each sketch
things_draw_recog_df['num_labels'] = things_draw_recog_df['response_list'].apply(lambda x: np.unique(np.array(x)).shape[0])
print(f"average number of sketches per concept: {things_draw_recog_df.groupby('concept').sketch_id.nunique().mean()}")
print(f"max number of sketches per concept: {things_draw_recog_df.groupby('concept').sketch_id.nunique().max()}")
print(f"min number of sketches per concept: {things_draw_recog_df.groupby('concept').sketch_id.nunique().min()}")

In [None]:
### group things_draw_recog_df by uniqueID and count the number of unique '_id's within each group
print(f"median number of times each sketch was labeled: {things_draw_recog_df.groupby('filename')['_id'].count().median()}")

In [None]:
print('average number of times each sketch was labeled:',things_draw_recog_df.groupby('sketch_id').size().mean())

In [None]:
print(f'mean top k recognition accuracy is {things_draw_recog_df.correct.mean()},\n\
and mean top k accuracy at the category level is {things_draw_recog_df.cat_correct.mean()}')

In [None]:
print(f'mean top 1 recognition accuracy is {things_draw_recog_df.top1_correct.mean()}, \n\
and mean top 1 accuracy at the category level is {things_draw_recog_df.top1_cat_correct.mean()}')

In [None]:
#### most easily recognized concepts
recog_response_vec_cat_df.groupby('concept').mean_accuracy.mean().sort_values(ascending=False).head(30)

In [None]:
recog_response_vec_df[recog_response_vec_df.concept=='mustache'].mean_accuracy.mean()

In [None]:
recog_response_vec_df.groupby('concept').mean_accuracy.mean().sort_values(ascending=False).tail(30)

In [None]:

### make a plot of the mean accuracy for each concept sorted from highest to lowest using sns barplot
plt.figure(figsize=(15,8))
sns.barplot(x='concept',y='mean_accuracy',data=recog_response_vec_df,order=recog_response_vec_df.groupby('concept').mean_accuracy.mean().sort_values(ascending=True).index, errorbar=None,color='gray')
# plt.xticks(rotation=90)
plt.ylabel('mean recognizability',fontsize=25)
plt.xlabel('concepts',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
# plt.title('mean recognition accuracy across concepts', fontsize=25)
# plt.savefig('VSS2023_mean_acc.pdf')
plt.savefig('ms_all_concepts_mean_acc.pdf')
plt.show()

In [None]:
print(f'these labels were never used \n:{non_label_concepts}')

In [None]:
## group recog_response_vec_df by concept and aggregate by mean accuracy and sum all the columns in things1854concepts

recog_response_vec_df_split = recog_response_vec_df.groupby(['uniqueID','familiar']).agg({'mean_accuracy':'mean', 'mean_top1_accuracy':'mean',\
                                                                           **{ x:'sum' for x in things1854concepts}}).reset_index()

recog_response_vec_df_agg = recog_response_vec_df.groupby(['uniqueID']).agg({'mean_accuracy':'mean', 'mean_top1_accuracy':'mean',\
                                                                           **{ x:'sum' for x in things1854concepts}}).reset_index()

In [None]:
print('the mean top 1 accuracy is:',recog_response_vec_df_agg.mean_top1_accuracy.mean())
print('sd:',recog_response_vec_df_agg.mean_top1_accuracy.std(),'\n','max:',recog_response_vec_df_agg.mean_top1_accuracy.max(),'\n','min:',recog_response_vec_df_agg.mean_top1_accuracy.min())
print('the mean any-match accuracy is:',recog_response_vec_df_agg.mean_accuracy.mean())
print('sd:',recog_response_vec_df_agg.mean_accuracy.std(),'\n','max:',recog_response_vec_df_agg.mean_accuracy.max(),'\n','min:',recog_response_vec_df_agg.mean_accuracy.min())

In [None]:

# Separate the data into two groups
familiar_true = recog_response_vec_df_split[recog_response_vec_df_split['familiar'] == True]['mean_accuracy']
familiar_false = recog_response_vec_df_split[recog_response_vec_df_split['familiar'] == False]['mean_accuracy']

# Perform a t-test
t_stat, p_value = ttest_ind(familiar_true, familiar_false)

# Print the results
print(f'Mean accuracy (familiar=True): {familiar_true.mean()}')
print(f'Standard deviation (familiar=True): {familiar_true.std()}')
print(f'Mean accuracy (familiar=False): {familiar_false.mean()}')
print(f'Standard deviation (familiar=False): {familiar_false.std()}')
print(f'T-statistic: {t_stat}')
print(f'P-value: {p_value}')

In [None]:
min_max_recognizability_dict = things_image_recongizability.groupby('uniqueID')['recognizability'].agg(['min', 'max']).to_dict('index')
### note: We now use the recognizability_homonyms variable instead as it is fairer 
recog_response_vec_df_agg['image_recongizability'] = recog_response_vec_df_agg['uniqueID'].apply(lambda x: things_image_recongizability[things_image_recongizability['uniqueID']==x]['recognizability_homonyms'].values.mean())
# recog_response_vec_df_agg['mean_accuracy_normalized'] = recog_response_vec_df_agg.apply(lambda x: mm_normalize(x, min_max_recognizability_dict), axis=1)
recog_response_vec_df_agg['mean_accuracy_normalized'] = recog_response_vec_df_agg['mean_accuracy']-recog_response_vec_df_agg['image_recongizability']

In [None]:
### get the number of rows in recog_response_vec_df_agg where mean_accuracy_normalized is > 0
recog_response_vec_df_agg[recog_response_vec_df_agg['mean_accuracy_normalized']>0].shape[0]

### print the top 30 concepts with the highest mean_accuracy_normalized
recog_response_vec_df_agg.sort_values(by='mean_accuracy_normalized',ascending=False).head(30)[['uniqueID','mean_accuracy_normalized','mean_accuracy','image_recongizability']]

In [None]:
print(f'proportion of sketch advantaged concepts - {np.round(recog_response_vec_df_agg[recog_response_vec_df_agg["mean_accuracy_normalized"]>0].shape[0]/recog_response_vec_df_agg.shape[0],3)}')

In [None]:
### do a fisher transformed t-test to compare mean_accuracy and image_recongizability

def safe_arctanh(r):
    r = np.clip(r, -0.9999, 0.9999)  # Restrict values within valid range
    return np.arctanh(r)


drawing_recog = recog_response_vec_df_agg['mean_accuracy'].values
image_recog = recog_response_vec_df_agg['image_recongizability'].values

drawing_recog_fisher = safe_arctanh(drawing_recog)
image_recog_fisher = safe_arctanh(image_recog)

t_stat, p_val = ttest_rel( image_recog_fisher,drawing_recog_fisher)
print(f"t-statistic: {t_stat}, p-value: {p_val}")

print(f"mean recognizability for drawings: {drawing_recog.mean()}")
print(f"standard deviation recognizability for drawings: {drawing_recog.std()}")
print(f"mean recognizability for images: {image_recog.mean()}")
print(f"standard deviation recognizability for images: {image_recog.std()}")


In [None]:
### compute the correlation between image_recog and drawing_recog and print all the related statistics
r, p = pearsonr(image_recog, drawing_recog)
print(f"Pearson r between image recognizability and drawing recognizability: {r}")
print(f"P-value: {p}")



In [None]:

tab20 = sns.color_palette("tab20")

In [None]:
plt.figure(figsize=(8,8))
sns.scatterplot(x='image_recongizability',y='mean_accuracy',data=recog_response_vec_df_agg,color=tab20[1])





plt.plot([0,1],[0,1],color='black',linestyle='--')
plt.xlabel('image recognizability',fontsize=25)
plt.ylabel('drawing recognizability',fontsize=25)
### add some horizontal and vertical lines at .5
# plt.axhline(y=.5, color='gray', linestyle='--')
# plt.axvline(x=.5, color='gray', linestyle='--')
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)

sns.despine()


plt.savefig('image_vs_drawing_recog.pdf', bbox_inches='tight', dpi=300)
plt.show()


In [None]:


plt.figure(figsize=(15,8))
sns.barplot(x='uniqueID',y='mean_accuracy_normalized',data=recog_response_vec_df_agg,order=recog_response_vec_df_agg.groupby('uniqueID').mean_accuracy_normalized.mean().sort_values(ascending=True).index, errorbar=None,color='gray')
# plt.xticks(rotation=90)
plt.ylabel('photo-relative mean \n sketch recognizability',fontsize=25)
plt.xlabel('concepts',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
# plt.title('image-relative recognizability scores', fontsize=25)
plt.savefig('all_concepts_im_relative_recognizability.pdf')
### despine please

    

plt.show()


### get the rank of each uniqueID in recog_response_vec_df_agg when sorting by mean_accuracy

recog_response_vec_df_agg['normed_acc_rank'] = recog_response_vec_df_agg['mean_accuracy_normalized'].rank(ascending=False)
recog_response_vec_df_agg['acc_rank'] = recog_response_vec_df_agg['mean_accuracy'].rank(ascending=False)
recog_response_vec_df_agg['image_acc_rank'] = recog_response_vec_df_agg['image_recongizability'].rank(ascending=False)

In [None]:

# Function to perform the bootstrapping and compute Kendall Tau
def bootstrap_kendall_tau(df, n_iterations=1000, sample_size=20):
    kendall_taus = []

    for _ in tqdm(range(n_iterations), desc="Bootstrapping"):
        # Sample 13 rows for each uniqueID
        sample1 = df.groupby('uniqueID').apply(lambda x: x.sample(n=sample_size, replace=True)).reset_index(drop=True)
        sample2 = df.groupby('uniqueID').apply(lambda x: x.sample(n=sample_size, replace=True)).reset_index(drop=True)

        # Compute the mean recognizability_homonyms for each uniqueID
        mean_sample1 = sample1.groupby('uniqueID')['recognizability_homonyms'].mean()
        mean_sample2 = sample2.groupby('uniqueID')['recognizability_homonyms'].mean()

        # Compute the Kendall Tau coefficient
        tau, _ = kendalltau(mean_sample1, mean_sample2)
        kendall_taus.append(tau)

    return kendall_taus

kendall_taus = bootstrap_kendall_tau(things_image_recongizability)

# Output the distribution of Kendall Tau coefficients
plt.hist(kendall_taus, bins=20, edgecolor='black')
plt.xlabel('Kendall Tau Coefficient')
plt.ylabel('Frequency')
plt.title('Distribution of Kendall Tau Coefficients')
plt.show()

# Print summary statistics
print(f'Mean Kendall Tau: {np.mean(kendall_taus)}')
print(f'Standard Deviation of Kendall Tau: {np.std(kendall_taus)}')

In [None]:
# print(spearmanr(recog_response_vec_df_agg['image_acc_rank'],recog_response_vec_df_agg['acc_rank']))
print("correlation between mean drawing recognizability and image recognizability:")
print(kendalltau(recog_response_vec_df_agg['image_acc_rank'],recog_response_vec_df_agg['acc_rank']))

In [None]:
### noise corrected Kendall tau
print("noise corrected correlation between mean drawing recognizability and image recognizability:")
print(kendalltau(recog_response_vec_df_agg['image_acc_rank'],recog_response_vec_df_agg['acc_rank'])[0]/np.mean(kendall_taus))


In [None]:
### compute distributional statistics for each concept

for i, row in recog_response_vec_df_agg.iterrows():
    response_vec = np.array(row[things1854concepts.tolist()])
    ### compute the entropy of the response vector
    probabilities = response_vec / np.sum(response_vec)
    entropy_value = entropy(probabilities.astype(float))
    recog_response_vec_df_agg.loc[i,'entropy'] = entropy_value
    recog_response_vec_df_agg.loc[i,'sdi'] =utils.SDI(row[things1854concepts.tolist()])
    
for i, row in recog_response_vec_df_split.iterrows():
    response_vec = np.array(row[things1854concepts.tolist()])
    ### compute the entropy of the response vector
    probabilities = response_vec / np.sum(response_vec)
    entropy_value = entropy(probabilities.astype(float))
    recog_response_vec_df_split.loc[i,'entropy'] = entropy_value
    recog_response_vec_df_split.loc[i,'sdi'] =utils.SDI(row[things1854concepts.tolist()])



In [None]:
## find the correlation between entropy and sdi
print(f"correlation between entropy and sdi across all concepts: \n {pearsonr(recog_response_vec_df_agg['entropy'].values,recog_response_vec_df_agg['sdi'].values)}")


In [None]:
print(f"correlation between mean accuracy and sdi across all concepts: \n {pearsonr(recog_response_vec_df_agg['mean_accuracy'].values,recog_response_vec_df_agg['sdi'].values)}")

In [None]:
print(f"correlation between mean top 1 accuracy and sdi across all concepts: \n {pearsonr(recog_response_vec_df_agg['mean_top1_accuracy'].values,recog_response_vec_df_agg['sdi'].values)}")

In [None]:
print(f"correlation between mean top accuracy and entropy across all concepts when observer did not recognize the concept:\n{pearsonr(recog_response_vec_df_split[recog_response_vec_df_split.familiar==False]['mean_accuracy'].values,recog_response_vec_df_split[recog_response_vec_df_split.familiar==False]['entropy'].values)}")

In [None]:
fam_df = recog_response_vec_df_split[recog_response_vec_df_split.familiar==True]
unfam_df = recog_response_vec_df_split[recog_response_vec_df_split.familiar==False]

In [None]:
quart_fam_df = []
quart_unfam_df =[]


fam_df['quartile_id'] = pd.qcut(fam_df['mean_top1_accuracy'],10,labels=False,duplicates='drop')
# unfam_df['quartile_id'] = pd.qcut(unfam_df['mean_top1_accuracy'],10,labels=False,duplicates='drop')
unfam_df['quartile_id']=unfam_df['uniqueID'].apply(lambda x: fam_df[fam_df.uniqueID == x]['quartile_id'].values[0])

recog_response_vec_df_agg['quartile_id'] = recog_response_vec_df_agg['uniqueID'].apply(lambda x: fam_df[fam_df.uniqueID == x]['quartile_id'].values[0])

In [None]:

plt.figure(figsize=(15,8))
sns.lineplot(x='quartile_id', y='mean_top1_accuracy', data=recog_response_vec_df_agg.groupby(['quartile_id']).mean_top1_accuracy.mean().sort_values(ascending=False).reset_index(),errorbar=None,linewidth=3,color=tab20[0])

plt.ylabel('Mean Accuracy',fontsize=25)
plt.xlabel('Decile',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
plt.ylim(0,1)
# plt.title('Mean Recognition Accuracy', fontsize=25)
plt.savefig('VSS2023_mean_acc_line.pdf')

In [None]:
tab20 = sns.color_palette("tab20")
plt.figure(figsize=(15,10))
sns.lineplot(x='quartile_id', y='mean_top1_accuracy', data=fam_df.groupby(['quartile_id']).mean_top1_accuracy.mean().sort_values(ascending=False).reset_index(),errorbar=None,linewidth=3,color=tab20[0])
plt.ylabel('Mean Accuracy',fontsize=25)
plt.xlabel('Decile',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
plt.ylim(0,1)
# plt.title('Mean Recognition Accuracy', fontsize=25)
plt.savefig('VSS2023_mean_acc_line_split.pdf')


In [None]:
plt.figure(figsize=(15,8))
sns.lineplot(x='quartile_id', y='mean_top1_accuracy', data=fam_df.groupby(['quartile_id']).mean_top1_accuracy.mean().sort_values(ascending=False).reset_index(),errorbar=None,linewidth=3,color=tab20[0])
sns.lineplot(x='quartile_id', y='mean_top1_accuracy', data=unfam_df.groupby(['quartile_id']).mean_top1_accuracy.mean().sort_values(ascending=False).reset_index(),errorbar=None,linewidth=3,color=tab20[1])

plt.ylabel('Mean Accuracy',fontsize=25)
plt.xlabel('Decile',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
plt.ylim(0,1)
# plt.title('Mean Recognition Accuracy', fontsize=25)
plt.savefig('VSS2023_mean_acc_line_split.pdf')


In [None]:

plt.figure(figsize=(15,8))
sns.barplot(x='uniqueID',y='sdi',data=recog_response_vec_df_agg,order=recog_response_vec_df_agg.groupby('uniqueID').sdi.mean().sort_values(ascending=False).index, errorbar=None,color='gray')
# plt.xticks(rotation=90)
plt.ylabel('SDI',fontsize=25)
plt.xlabel('Concept',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
plt.ylim(.5,1)
plt.title('SDI', fontsize=25)
plt.savefig('VSS2023_SDI.pdf')

plt.show()

In [None]:



plt.figure(figsize=(15,8))
sns.lineplot(x='quartile_id', y='sdi', data=fam_df.groupby(['quartile_id']).sdi.mean().sort_values(ascending=False).reset_index(),errorbar=None,linewidth=3,color=tab20[0])
sns.lineplot(x='quartile_id', y='sdi', data=unfam_df.groupby(['quartile_id']).sdi.mean().sort_values(ascending=False).reset_index(),errorbar=None,linewidth=3,color=tab20[1])

plt.ylabel('SDI',fontsize=25)
plt.xlabel('Decile',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
plt.ylim(.5,1)
# plt.title('Mean Recognition Accuracy', fontsize=25)
plt.savefig('VSS2023_SDI_split.pdf')


In [None]:

plt.figure(figsize=(8,8))
sns.scatterplot(x='sdi',y='mean_accuracy',data=recog_response_vec_df_agg, color = tab20[1])
plt.xlabel("Simpson's diversity index",fontsize=20)
plt.ylabel('drawing recognizability',fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.ylim(0,1)
# plt.savefig('VSS2023_SDI_v_accuracy.pdf')
sns.despine()
plt.tight_layout()
plt.savefig('SDI_v_mean_accuracy.pdf')
# plt.suptitle('Mean Overall Accuracy vs SDI',fontsize=25)
plt.show()


In [None]:
### using seaborn make a scatterplot of mean_accuracy vs sdi in recog_response_vec_df_agg
plt.figure(figsize=(8,8))
sns.scatterplot(x='sdi',y='mean_top1_accuracy',data=recog_response_vec_df_agg, color=tab20[1])
plt.xlabel('SDI',fontsize=20)
plt.ylabel('Mean Top 1 Accuracy',fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
# plt.title('Mean Top1 Accuracy vs SDI',fontsize=25)
plt.show()


In [None]:
recog_response_vec_df_agg['mean_fam'] = mean_fam_df['recog']
pearsonr(recog_response_vec_df_agg['mean_fam'].values,recog_response_vec_df_agg['mean_accuracy'].values)

In [None]:


plt.figure(figsize=(15,8))
sns.barplot(x='uniqueID',y='mean_accuracy_normalized',data=recog_response_vec_df_agg,order=recog_response_vec_df_agg.groupby('uniqueID').mean_accuracy_normalized.mean().sort_values(ascending=True).index, errorbar=None,color='gray')
# plt.xticks(rotation=90)
plt.ylabel('photo-relative mean \n sketch recognizability',fontsize=25)
plt.xlabel('concepts',fontsize=25)
## hide y ticks
plt.xticks([])
plt.yticks(fontsize=25)
# plt.title('image-relative recognizability scores', fontsize=25)
plt.savefig('all_concepts_im_relative_recognizability.pdf')

plt.show()


### get the rank of each uniqueID in recog_response_vec_df_agg when sorting by mean_accuracy

recog_response_vec_df_agg['normed_acc_rank'] = recog_response_vec_df_agg['mean_accuracy_normalized'].rank(ascending=False)
recog_response_vec_df_agg['acc_rank'] = recog_response_vec_df_agg['mean_accuracy'].rank(ascending=False)
recog_response_vec_df_agg['image_acc_rank'] = recog_response_vec_df_agg['image_recongizability'].rank(ascending=False)

In [None]:
### using seaborn make a scatterplot of mean_accuracy vs sdi in recog_response_vec_df_agg
plt.figure(figsize=(8,8))
sns.scatterplot(x='mean_fam',y='mean_accuracy',data=recog_response_vec_df_agg)
plt.xlabel('Mean Familiarity',fontsize=20)
plt.ylabel('Mean Accuracy',fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.title('Mean Overall Accuracy vs Familiarity',fontsize=25)
plt.show()


In [None]:
quart_recog_df = []
for this_concept in things1854concepts:
    ds = recog_response_vec_df[recog_response_vec_df.uniqueID==this_concept]
    ds['quartile_id'] = pd.qcut(ds['num_strokes'],4,labels=False,duplicates='drop')

    ds_g = ds.groupby(['uniqueID','quartile_id'])[['num_strokes','mean_top1_accuracy','mean_accuracy']].mean().reset_index()
    quart_recog_df.append(ds_g)
    

## divide t into 4 quartiles based on num_strokes


quart_recog_df = pd.concat(quart_recog_df).reset_index(drop=True)


In [None]:

gpt_feat_cols = things_gpt_word_embeds.columns[:-1]


# cosine_dist_df = pd.DataFrame(cosine_distances(things_gpt_word_embeds[gpt_feat_cols]), columns=things_gpt_word_embeds['concept'].unique(), index=things_gpt_word_embeds['concept'].unique())

cosine_dist_df = pd.DataFrame(cosine_distances(spose_embeds[spose_cols]), columns=things1854concepts, index=things1854concepts)

nearest_concepts = {}
for concept in cosine_dist_df.columns:
    nearest_concepts[concept] = cosine_dist_df[concept].sort_values()[1:].index.tolist()


In [None]:
things_draw_recog_df['top1_rank'] = \
    things_draw_recog_df.apply(lambda x: 0 if x.response_list[0]==x.uniqueID else nearest_concepts[x.uniqueID].index(x.response_list[0]),axis=1)

things_draw_recog_df['topk_rank'] = \
    things_draw_recog_df.apply(
        lambda x: 0 if any(response == x.uniqueID for response in x.response_list) else min(
            (nearest_concepts[x.uniqueID].index(this_response) for this_response in x.response_list if this_response in nearest_concepts[x.uniqueID]),
            default=None
        ),
        axis=1
    )



In [None]:
things_image_recongizability['response_list'] =things_image_recongizability['answer'].apply(lambda x: x.split(', '))


# Explode the 'response_list' column to create a new row for each item in the list
things_image_recongizability_trialwise = things_image_recongizability.explode('response_list')


# Rename the 'response_list' column to 'response'
things_image_recongizability_trialwise = things_image_recongizability_trialwise.rename(columns={'response_list': 'response'})

# Reset the index
things_image_recongizability_trialwise = things_image_recongizability_trialwise.reset_index(drop=True)



display_label2uniqueID = dict(zip(things_image_recongizability_trialwise.display_label,things_image_recongizability_trialwise.uniqueID))

# things_image_recongizability_trialwise['response'] = things_image_recongizability_trialwise['response'].apply(lambda x: "baton (conductor's baton)" if x == '"baton (conductors baton)"' else x)
# things_image_recongizability_trialwise['response'] = things_image_recongizability_trialwise['response'].apply(lambda x: "tick (insect)" if x == '<em class="Highlight ht22e5d362-63a9-4678-acc3-95564bce8064" highlight="true" match="tick" loopnumber="807296746" style="font-style: inherit;">tick</em> (insect)' else x)
# # things_image_recongizability_trialwise = things_image_recongizability_trialwise[things_image_recongizability_trialwise.response!='229'].reset_index(drop=True)
# things_image_recongizability_trialwise = things_image_recongizability_trialwise[things_image_recongizability_trialwise.response!='229) 1px 1px; border-radius: 3px; background-color: rgb(255'].reset_index(drop=True)

### if response is not among the keys in display_label2uniqueID remove the row and reset index
things_image_recongizability_trialwise = things_image_recongizability_trialwise[things_image_recongizability_trialwise.response.isin(display_label2uniqueID.keys())].reset_index(drop=True)


In [None]:
display_label2uniqueID['tick (insect)']

In [None]:

things_image_recongizability_trialwise['topk_rank'] = \
    things_image_recongizability_trialwise.apply(
        lambda x: 0 if x.response == x.display_label else nearest_concepts[display_label2uniqueID[x.display_label]].index(display_label2uniqueID[x.response]),axis=1)

In [None]:
things_draw_recog_df[~(things_draw_recog_df['topk_rank']==things_draw_recog_df['top1_rank'])]

In [None]:
### iterate over all the recognition trials and compute the similarity between the target concept and the mean of the responses
for i, row in things_draw_recog_df.iterrows():
    target_embed = spose_embeds[spose_embeds['concept']==row.uniqueID][spose_cols].values[0]
    responses = row.response_list
    response_embeds = []
    for this_response in responses:
        response_embed = spose_embeds[spose_embeds['concept']==this_response][spose_cols].values[0]
        response_embeds.append(response_embed)

    similarity_scores = [cosine_similarity(target_embed.reshape(1, -1),this_embed.reshape(1, -1))[0][0] for this_embed in response_embeds]
    things_draw_recog_df.loc[i,'similarity_score'] = max(similarity_scores)
    mean_response_embed = np.mean(response_embeds,axis=0)
    mean_similarity_score = cosine_similarity(target_embed.reshape(1, -1),mean_response_embed.reshape(1, -1))
    things_draw_recog_df.loc[i,'similarity_score_mean'] = mean_similarity_score[0][0]

            

In [None]:
# for things_draw_recog_df first groupby concept then summarize then make a plot of mean_similarity_score vs. mean_accuracy

things_draw_recog_df_agg =  things_draw_recog_df.groupby('uniqueID').agg({'similarity_score':'mean','correct':'mean','similarity_score_mean':'mean'}).reset_index()


In [None]:
pearsonr(things_draw_recog_df_agg['similarity_score'],things_draw_recog_df_agg['correct'])

In [None]:
# things_draw_recog_df[['uniqueID','similarity_score','correct']].to_csv(os.path.join(data_dir,'things_draw_recog_similarity.csv'),index=False)

In [None]:
pearsonr(things_draw_recog_df_agg['similarity_score_mean'],things_draw_recog_df_agg['correct'])



In [None]:


## make a plot of similarity_score vs. correct in things_draw_recog_df_agg

plt.figure(figsize=(8,8))
sns.scatterplot(x='similarity_score',y='correct',data=things_draw_recog_df_agg,color=tab20[1])
plt.xlabel('mean semantic similarity between guesses and labels',fontsize=20)
plt.ylabel('drawing recognizability',fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.ylim(0,1)
plt.xlim(.3,1)
# plt.title('Mean Accuracy vs. Similarity Score',fontsize=25)
sns.despine()
plt.savefig('accuracy_vs_similarity.pdf',dpi=300,bbox_inches='tight')
plt.show()



In [None]:
plt.figure(figsize=(15,8))
sns.histplot(data=things_draw_recog_df[things_draw_recog_df['top1_rank']!=0],x='top1_rank', bins=30,legend=False,stat='probability')
# plt.title('Top 1 Rank Distribution')
plt.xlabel('Top 1 Rank', fontsize=20)
plt.ylabel('Proportion', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

## set facecolor to be white
ax = plt.gca()
ax.set_facecolor('white')



In [None]:
plt.figure(figsize=(15,8))
sns.histplot(data=things_draw_recog_df[things_draw_recog_df['topk_rank']!=0],x='top1_rank', bins=30,legend=False,stat='probability')
# plt.title('Top 1 Rank Distribution')
plt.xlabel('Top k Rank', fontsize=20)
plt.ylabel('Proportion', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

## set facecolor to be white
ax = plt.gca()
ax.set_facecolor('white')



### Some examples

In [None]:
plt.figure(figsize=(15,8))
sns.histplot(data=things_draw_recog_df[(things_draw_recog_df['top1_rank']!=0) &(things_draw_recog_df['concept']=='pom-pom')],x='top1_rank', bins=30,legend=False,stat='probability',color=tab20[1])
plt.title('Pom-pom', fontsize=25)
plt.xlabel('Rank', fontsize=20)
plt.ylabel('Proportion', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.xlim(0,1850)
## set facecolor to be white
ax = plt.gca()
ax.set_facecolor('white')
plt.savefig('VSS2023_rank_dist_pompom.pdf')


In [None]:
# from scipy.stats import gamma


# unif_df = pd.DataFrame({'x':np.arange(1854),'y':np.ones(1854)})

# xs = []
# # ys = []
# for i in range(1,1854):
#     this_y = 2/i*10000
#     for j in range(int(this_y)):
#         xs.append(i)
# exp_df = pd.DataFrame({'x':xs})

# plt.figure(figsize=(15,8))
# sns.histplot(data=unif_df,x='x', bins=40,legend=False,stat='probability',color=tab20[1])
# plt.title('Uniform', fontsize=25)
# plt.xlabel('Rank', fontsize=20)
# plt.ylabel('Proportion', fontsize=20)
# plt.xticks(fontsize=18)
# plt.yticks([],fontsize=18)
# plt.xlim(0,1850)
# plt.ylim(0,.06)
# ## set facecolor to be white
# ax = plt.gca()
# ax.set_facecolor('white')
# plt.savefig('VSS2023_uniform.pdf')

# plt.figure(figsize=(15,8))
# sns.histplot(data=exp_df,x='x', bins=40,legend=False,stat='probability',color=tab20[1])
# plt.title('Skew', fontsize=25)
# plt.xlabel('Rank', fontsize=20)
# plt.ylabel('Proportion', fontsize=20)
# plt.xticks(fontsize=18)
# plt.yticks([],fontsize=18)
# plt.xlim(0,1850)
# plt.ylim(0,.06)
# ## set facecolor to be white
# ax = plt.gca()
# ax.set_facecolor('white')
# plt.savefig('VSS2023_left_skew.pdf')


### Comparing Recognition performance to other measures

In [None]:
THINGS_mem_dict = dict(zip(THINGS_mem['concept'],THINGS_mem['CR']))
### fix
# THINGS_mem_dict['goalpost'] = 0
things_draw_recog_df['typicality'] = things_draw_recog_df['uniqueID'].map(THINGS_mem_dict)

In [None]:
# ### fit a mixed effects model predicting similarity_score from mean_mem and num_strokes with random intercepts for subject 
# import statsmodels.formula.api as smf

# reg_df = things_draw_recog_df.dropna(subset=['typicality','similarity_score','num_strokes'])

# things_draw_recog_df["workerID"] = things_draw_recog_df["workerID"].astype(str)
# md = smf.mixedlm("similarity_score ~ typicality + num_strokes ", reg_df, groups=reg_df["workerID"])
# mdf = md.fit()
# print(mdf.summary())


In [None]:
# np.corrcoef(reg_df['similarity_score'],reg_df['typicality'])

In [None]:
concept_list_top1_rank=[]
concept_list_topk_rank=[]
top1_ranks = []
topk_ranks = []
cumulative_props_top1=[]
cumulative_props_topk=[]


for this_concept in things1854concepts:
    ds = things_draw_recog_df[things_draw_recog_df['uniqueID']==this_concept]
    ds = ds[ds.top1_rank!=0] ### get all the rows where the guess was not the correct object concept
    top1_ranks.append(ds['top1_rank'].value_counts(normalize=True).sort_index().cumsum().index)
    topk_ranks.append(ds['topk_rank'].value_counts(normalize=True).sort_index().cumsum().index)
    cumulative_props_top1.append(ds['top1_rank'].value_counts(normalize=True).sort_index().cumsum().values)
    cumulative_props_topk.append(ds['topk_rank'].value_counts(normalize=True).sort_index().cumsum().values)
    concept_list_top1_rank.append([this_concept]*ds['top1_rank'].value_counts(normalize=True).sort_index().cumsum().index.shape[0])
    concept_list_topk_rank.append([this_concept]*ds['topk_rank'].value_counts(normalize=True).sort_index().cumsum().index.shape[0])

top1_ranks = np.concatenate(top1_ranks)
cumulative_props_top1 = np.concatenate(cumulative_props_top1)
topk_ranks = np.concatenate(topk_ranks)
cumulative_props_topk = np.concatenate(cumulative_props_topk)
concept_list_top1_rank = np.concatenate(concept_list_top1_rank)
concept_list_topk_rank = np.concatenate(concept_list_topk_rank)

cumulative_prop_df_top1 = pd.DataFrame({'concept':concept_list_top1_rank,'rank':top1_ranks,'cumulative_prop':cumulative_props_top1})
cumulative_prop_df_top1['rank'] = cumulative_prop_df_top1['rank']/1854

cumulative_prop_df_topk = pd.DataFrame({'concept':concept_list_topk_rank,'rank':topk_ranks,'cumulative_prop':cumulative_props_topk})
cumulative_prop_df_topk['rank'] = cumulative_prop_df_topk['rank']/1854


In [None]:

# concept_list_topk_rank=[]
# topk_ranks = []
# cumulative_props_topk=[]


# for this_concept in things1854concepts:
#     ds = things_image_recongizability[things_image_recongizability['uniqueID']==this_concept]
#     ds = ds[ds.top1_rank!=0] ### get all the rows where the guess was not the correct object concept
#     topk_ranks.append(ds['topk_rank'].value_counts(normalize=True).sort_index().cumsum().index)
#     cumulative_props_topk.append(ds['topk_rank'].value_counts(normalize=True).sort_index().cumsum().values)

#     concept_list_topk_rank.append([this_concept]*ds['topk_rank'].value_counts(normalize=True).sort_index().cumsum().index.shape[0])

# topk_ranks = np.concatenate(topk_ranks)
# cumulative_props_topk = np.concatenate(cumulative_props_topk)
# concept_list_topk_rank = np.concatenate(concept_list_topk_rank)


# cumulative_prop_df_topk_images = pd.DataFrame({'concept':concept_list_topk_rank,'rank':topk_ranks,'cumulative_prop':cumulative_props_topk})
# cumulative_prop_df_topk_images['rank'] = cumulative_prop_df_topk_images['rank']/1854

In [None]:

def compute_cumulative_props(df, concepts, rank_column, normalize_factor):
    concept_list = []
    ranks = []
    cumulative_props = []

    for this_concept in concepts:
        ds = df[df['uniqueID'] == this_concept]
        ds = ds[ds[rank_column] != 0]  # Get all the rows where the guess was not the correct object concept
        rank_counts = ds[rank_column].value_counts(normalize=True).sort_index().cumsum()
        ranks.append(rank_counts.index)
        cumulative_props.append(rank_counts.values)
        concept_list.append([this_concept] * rank_counts.index.shape[0])

    ranks = np.concatenate(ranks)
    cumulative_props = np.concatenate(cumulative_props)
    concept_list = np.concatenate(concept_list)

    cumulative_prop_df = pd.DataFrame({'concept': concept_list, 'rank': ranks, 'cumulative_prop': cumulative_props})
    cumulative_prop_df['rank'] = cumulative_prop_df['rank'] / normalize_factor

    return cumulative_prop_df

# Define the concepts and normalize factor
concepts = things1854concepts
normalize_factor = 1854

# Compute cumulative properties for top1_rank and topk_rank for drawings
cumulative_prop_df_top1_drawings = compute_cumulative_props(things_draw_recog_df, concepts, 'top1_rank', normalize_factor)
cumulative_prop_df_topk_drawings = compute_cumulative_props(things_draw_recog_df, concepts, 'topk_rank', normalize_factor)

# Compute cumulative properties for topk_rank for images
cumulative_prop_df_topk_images = compute_cumulative_props(things_image_recongizability_trialwise, concepts, 'topk_rank', normalize_factor)

# Print the resulting dataframes
print(cumulative_prop_df_top1_drawings.head())
print(cumulative_prop_df_topk_drawings.head())
print(cumulative_prop_df_topk_images.head())

In [None]:
plt.figure(10,figsize=(10,10))
sns.lineplot(data=cumulative_prop_df_top1_drawings,x='rank',y='cumulative_prop',hue='concept',legend=False)
plt.title('Cumulative Proportion of Top 1 Rank',fontsize=25)
plt.xlabel('Rank', fontsize=20)
plt.ylabel('Cumulative Proportion', fontsize=20)

In [None]:
plt.figure(10,figsize=(10,10))
sns.lineplot(data=cumulative_prop_df_top1_drawings,x='rank',y='cumulative_prop',legend=False)
# plt.title('Proportion ',fontsize=25)
plt.xlabel('Semantic distance of top1 guesses', fontsize=20)
plt.ylabel('Cumulative Proportion', fontsize=20)
plt.xlim(0,1)
plt.ylim(0,1)
plt.tick_params(labelsize=20)
plt.plot(np.arange(0,1,.002),np.arange(0,1,.002),linewidth=2,color='gray',linestyle='--')
# plt.savefig('overall_AUC.pdf',dpi=300,bbox_inches='tight')


In [None]:
plt.figure(10,figsize=(10,10))
sns.lineplot(data=cumulative_prop_df_topk_drawings,x='rank',y='cumulative_prop',legend=False)
# plt.title('Proportion ',fontsize=25)
plt.xlabel('Semantic distance of top-k guesses', fontsize=20)
plt.ylabel('Cumulative Proportion', fontsize=20)
plt.xlim(0,1)
plt.ylim(0,1)
plt.tick_params(labelsize=20)
plt.plot(np.arange(0,1,.002),np.arange(0,1,.002),linewidth=2,color='gray',linestyle='--')
# plt.savefig('overall_AUC.pdf',dpi=300,bbox_inches='tight')


In [None]:
from sklearn.metrics import auc

In [None]:
auc_df = pd.DataFrame(columns = ['concept','auc_top1_drawings','auc_topk_drawings','auc_top_k_images'])
for this_concept in things1854concepts:
    this_df_top1_drawings = cumulative_prop_df_top1_drawings[cumulative_prop_df_top1_drawings['concept']==this_concept]
    this_df_topk_drawings = cumulative_prop_df_topk_drawings[cumulative_prop_df_topk_drawings['concept']==this_concept]
    this_df_topk_images = cumulative_prop_df_topk_images[cumulative_prop_df_topk_images['concept']==this_concept]
    this_df_top1_drawings = pd.concat([pd.DataFrame({'concept':this_concept,'rank':0,'cumulative_prop':0},index=[0]),
                                       this_df_top1_drawings,
                                       pd.DataFrame({'concept':this_concept,'rank':1,'cumulative_prop':1},index=[this_df_top1_drawings.shape[0]+1])])
    this_df_topk_drawings = pd.concat([pd.DataFrame({'concept':this_concept,'rank':0,'cumulative_prop':0},index=[0]),
                                       this_df_topk_drawings,
                                       pd.DataFrame({'concept':this_concept,'rank':1,'cumulative_prop':1},index=[this_df_topk_drawings.shape[0]+1])])
    this_df_topk_images = pd.concat([pd.DataFrame({'concept':this_concept,'rank':0,'cumulative_prop':0},index=[0]),
                                     this_df_topk_images,
                                     pd.DataFrame({'concept':this_concept,'rank':1,'cumulative_prop':1},index=[this_df_topk_images.shape[0]+1])])
    this_auc_top1_drawings = this_df_top1_drawings.groupby('concept').apply(lambda x: auc(x['rank'],x['cumulative_prop']))
    this_auc_topk_drawings = this_df_topk_drawings.groupby('concept').apply(lambda x: auc(x['rank'],x['cumulative_prop']))
    this_auc_topk_images = this_df_topk_images.groupby('concept').apply(lambda x: auc(x['rank'],x['cumulative_prop']))
    auc_df = pd.concat([auc_df,pd.DataFrame({'concept':this_concept,'auc_top1_drawings':this_auc_top1_drawings.values[0], 'auc_topk_drawings':this_auc_topk_drawings.values[0], 'auc_topk_images':this_auc_topk_images.values[0] },index=[0])])


In [None]:
auc_df

In [None]:
# plt.figure(10,figsize=(10,10))
# sns.lineplot(data=cum_prop_df[cum_prop_df.concept=='camel'],x='rank',y='cum_prop',legend=False, linewidth=5)
# plt.title(f'Camel AUC: {0.98}',fontsize=25)
# plt.xlabel('Semantic distance of guesses', fontsize=20)
# plt.xlim(0,1)
# plt.ylim(0,1)
# plt.tick_params(labelsize=20)
# plt.ylabel('Cumulative Proportion', fontsize=20)
# plt.plot(np.arange(0,1,.002),np.arange(0,1,.002),linewidth=2,color='gray',linestyle='--')
# plt.savefig('camel_AUC.pdf',dpi=300,bbox_inches='tight')


In [None]:
# plt.figure(10,figsize=(10,10))
# sns.lineplot(data=cum_prop_df[cum_prop_df.concept=='bagpipe'],x='rank',y='cum_prop',legend=False, linewidth=5)
# plt.title(f'Bagpipe AUC: {0.43}',fontsize=25)
# plt.xlim(0,1)
# plt.ylim(0,1)
# plt.xlabel('Semantic distance of guesses', fontsize=20)
# plt.ylabel('Cumulative Proportion', fontsize=20)
# plt.tick_params(labelsize=20)
# plt.plot(np.arange(0,1,.002),np.arange(0,1,.002),linewidth=2,color='gray',linestyle='--')
# plt.savefig('bagpipe_AUC.pdf',dpi=300,bbox_inches='tight')


In [None]:
### count how many rows have auc_topk> .5
auc_df.auc_topk_images.min()

In [None]:

# Define the data and the variables to plot
data = auc_df
variables = [
    ('auc_top1_drawings', 'Drawings (top 1 guesses)'),
    ('auc_topk_drawings', 'Drawings (top k guesses)'),
    ('auc_topk_images', 'Images (top k guesses)')
]

# Create a long-form DataFrame for FacetGrid
long_df = pd.melt(data, value_vars=[v[0] for v in variables], 
                  var_name='measure', value_name='value')

# Map the measure names to the appropriate labels
measure_labels = {v[0]: v[1] for v in variables}
long_df['measure'] = long_df['measure'].map(measure_labels)

# Create the FacetGrid
g = sns.FacetGrid(long_df, col='measure', sharex=True, sharey=True, height=10, aspect=0.8)
g.map_dataframe(sns.violinplot, y='value', saturation=1, cut=0, bw_method=.1, inner='quartile', color=tab20[1])
g.set(ylim=(0, 1))

# Set axis labels and titles
g.set_titles(col_template='{col_name}', size=30)


# Set the background color for each axis
for ax in g.axes.flat:
    ax.set_facecolor('white')
    ax.tick_params(axis='y', labelsize=30)
    mean_auc = np.mean(long_df[long_df['measure'] == ax.get_title()]['value'])
    std_auc = np.std(long_df[long_df['measure'] == ax.get_title()]['value'])
    n = len(long_df[long_df['measure'] == ax.get_title()]['value'])
    ci = 1.96 * (std_auc / np.sqrt(n))
    ax.errorbar(x=0, y=mean_auc, yerr=ci, fmt='o', color='black', ecolor='black', capsize=5, markersize=10)
    ax.axhline(y=0.5, color='gray', linestyle='--', zorder=999, linewidth=2)
    ax.yaxis.set_tick_params(left=True, labelleft=True)  # Ensure y-axis ticks and labels 
    ax.set_ylabel('semantic neighbor preference', fontsize=35)
    ax.set_ylim(0.3, 1)

# Adjust the spacing between plots
g.figure.subplots_adjust(wspace=0.3)

# Save the plot
plt.savefig('SNP_violin_facet.pdf', dpi=300, bbox_inches='tight')
plt.show()

In [None]:


##print relevant stats
print(f"mean auc_top1_drawings: {np.mean(auc_df['auc_top1_drawings'])}")
print(f"mean auc_topk_drawings: {np.mean(auc_df['auc_topk_drawings'])}")
print(f"mean auc_topk_images: {np.mean(auc_df['auc_topk_images'])}")

print(f"std auc_top1_drawings: {np.std(auc_df['auc_top1_drawings'])}")
print(f"std auc_topk_drawings: {np.std(auc_df['auc_topk_drawings'])}")
print(f"std auc_topk_images: {np.std(auc_df['auc_topk_images'])}")

### find the correlation between auc_top1_drawings and auc_topk_drawings and print relevant stats

print(f"correlation between auc_top1_drawings and auc_topk_drawings: {pearsonr(auc_df['auc_top1_drawings'],auc_df['auc_topk_drawings'])}")

print(f"correlation between auc_top1_drawings and auc_topk_images: {pearsonr(auc_df['auc_top1_drawings'],auc_df['auc_topk_images'])}")

print(f"correlation between auc_topk_drawings and auc_topk_images: {pearsonr(auc_df['auc_topk_drawings'],auc_df['auc_topk_images'])}")

In [None]:
auc_df['memorability'] = auc_df['concept'].apply(lambda x: THINGS_mem[THINGS_mem.concept==x]['CR'].values[0])
auc_df['typicality'] = auc_df['concept'].apply(lambda x: THINGS_mem[THINGS_mem.concept==x]['dim_based_typ'].values[0])

In [None]:
# Define the data and the variables to plot
data = auc_df
variables = [
    ('auc_top1_drawings', 'Drawings (top 1 guesses)'),
    ('auc_topk_drawings', 'Drawings (top k guesses)'),
    ('auc_topk_images', 'Images (top k guesses)')
]

# Create a long-form DataFrame for FacetGrid
long_df = pd.melt(data, id_vars=['typicality'], value_vars=[v[0] for v in variables], 
                  var_name='measure', value_name='value')

# Map the measure names to the appropriate labels
measure_labels = {v[0]: v[1] for v in variables}
long_df['measure'] = long_df['measure'].map(measure_labels)

# Create the FacetGrid
g = sns.FacetGrid(long_df, col='measure', sharex=True, sharey=True, height=8, aspect=1)
g.map_dataframe(sns.regplot, x='typicality', y='value', line_kws={'color':'gray'}, scatter_kws={'color':tab20[1]})

# Set axis labels and titles
# g.set_axis_labels('typicality', 'semantic neighbour preference',size=20)
g.set_titles(col_template='{col_name}', size=30)

# Set the x limits to 0,1
g.set(xlim=(.3, 1))

# Format the axes
for ax in g.axes.flat:
    ax.set_facecolor('white')
    ax.tick_params(axis='both', labelsize=30)  # Set both x and y tick label sizes
    ax.yaxis.set_tick_params(left=True, labelleft=True)  # Ensure y-axis ticks and labels are visible
    ax.set_ylabel('semantic neighbor preference', fontsize=35)
    ax.set_xlabel('typicality', fontsize=35)

# Adjust the spacing between plots
g.figure.subplots_adjust(wspace=0.2)  # Increased wspace slightly to accommodate y-axis labels

# Save the plot
plt.savefig('typicality_vs_snp_facet.pdf', dpi=300, bbox_inches='tight')
plt.show()

# Compute and print Pearson correlation coefficients
for var, label in variables:
    r, p = pearsonr(data['typicality'], data[var])
    print(f'Pearson correlation between typicality and {label}: r={r:.2f}, p={p:.2e}')

In [None]:
# Define the data and the variables to plot
data = auc_df
variables = [
    ('auc_top1_drawings', 'Drawings (top 1 guesses)'),
    ('auc_topk_drawings', 'Drawings (top k guesses)'),
    ('auc_topk_images', 'Images (top k guesses)')
]

# Create a long-form DataFrame for FacetGrid
long_df = pd.melt(data, id_vars=['memorability'], value_vars=[v[0] for v in variables], 
                  var_name='measure', value_name='value')

# Map the measure names to the appropriate labels
measure_labels = {v[0]: v[1] for v in variables}
long_df['measure'] = long_df['measure'].map(measure_labels)

# Create the FacetGrid
g = sns.FacetGrid(long_df, col='measure', sharex=True, sharey=True, height=8, aspect=1)
g.map_dataframe(sns.regplot, x='memorability', y='value', line_kws={'color':'gray'}, scatter_kws={'color':tab20[1]})

# Set axis labels and titles
g.set_axis_labels('memorability', 'semantic neighbour preference', size=20)
g.set_titles(col_template='{col_name}', size=30)

# Set the x limits and ticks
x_ticks = np.arange(0.6, 1.01, 0.1)  # Generate ticks from 0.6 to 1.0 in steps of 0.1
g.set(xlim=(0.6, 1), xticks=x_ticks)

# Format the axes
for ax in g.axes.flat:
    ax.set_facecolor('white')
    ax.tick_params(axis='both', labelsize=30)  # Set both x and y tick label sizes
    ax.yaxis.set_tick_params(left=True, labelleft=True)  # Ensure y-axis ticks and labels are visible
    # Format x-axis tick labels to show only one decimal place
    ax.set_xticklabels([f'{x:.1f}' for x in x_ticks])
    ax.set_ylabel('semantic neighbor preference', fontsize=35)
    ax.set_xlabel('memorability', fontsize=35)


# Adjust the spacing between plots
g.figure.subplots_adjust(wspace=0.2)  # Increased wspace slightly to accommodate y-axis labels

# Save the plot
plt.savefig('memorability_vs_snp_facet.pdf', dpi=300, bbox_inches='tight')
plt.show()

# Compute and print Pearson correlation coefficients
for var, label in variables:
    # Create mask for non-nan values
    mask = ~np.isnan(data['memorability'].values) & ~np.isnan(data[var].values)
    
    # Calculate correlation using masked values
    r, p = pearsonr(data['memorability'].values[mask], 
                    data[var].values[mask])
    
    print(f'Pearson correlation between memorability and {label}: r={r:.2f}, p={p:.2e}')



In [None]:
things1854concepts_str = np.vectorize(str)(things1854concepts)

things1854concepts_str[np.char.startswith(things1854concepts_str, 'head')]

In [None]:
test_strokes = ast.literal_eval(things_draw_prod_df[things_draw_prod_df['_id']=='641a2ef188da294e33e766ae'].strokes.values[0])
test_undos = ast.literal_eval(things_draw_prod_df[things_draw_prod_df['_id']=='641a2ef188da294e33e766ae'].undo_history.values[0])
concept2category_dict['tiara']


plt.figure(figsize=(5,5))
cm = sns.color_palette('viridis')

for i, stroke in enumerate(test_strokes):

    xs = [x['x'] for x in stroke if x['action']!='end']
    ys = [x['y'] for x in stroke if x['action']!='end']
    plt.scatter(np.array(xs),550-np.array(ys), color=cm[i])
plt.xlim(0,550)
plt.ylim(0,550)
plt.xticks([])
plt.yticks([])
ax = plt.gca()
ax.set_facecolor('white')

# plt.title(f'pen trajectory for {sketch_df.concept[3]}'  )
plt.show()

In [None]:

def render_grid(df, grid_size=8):
    # Sample rows from the dataframe
    sampled_rows = df.sample(n=grid_size**2, random_state=52)
    
    # Create a figure with a grid of subplots
    fig, axes = plt.subplots(grid_size, grid_size, figsize=(20, 20))
    cm = sns.color_palette('magma_r', n_colors=100)
    
    for idx, (ax, (_, row)) in enumerate(zip(axes.flatten(), sampled_rows.iterrows())):
        # Extract strokes and undo history
        strokes = ast.literal_eval(row['strokes'])
        undos = ast.literal_eval(row['undo_history'])
        
        # Plot each stroke
        for i, stroke in enumerate(strokes):
            xs = [x['x'] for x in stroke if x['action'] != 'end']
            ys = [x['y'] for x in stroke if x['action'] != 'end']
            ax.scatter(np.array(xs), 550 - np.array(ys), color=cm[i +5])
        
        # Set the title to the concept
        ax.set_title(row['concept'].replace('_',' '), fontsize=20)
        
        ax.set_xlim(0, 550)
        ax.set_ylim(0, 550)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_facecolor('white')
    
    plt.tight_layout()
    plt.savefig('sketch_grid_strokes.pdf',dpi=300,bbox_inches='tight')
    plt.show()


In [None]:

# Example usage
render_grid(sketch_trials_df)

In [None]:
## do a pca on the spose embeddings using the spose_cols to subset spose_embeds and visualize the concepts in 3d
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca_result = pca.fit_transform(spose_embeds[spose_cols].values)
spose_embeds['pca-one'] = pca_result[:,0]
spose_embeds['pca-two'] = pca_result[:,1]
spose_embeds['pca-three'] = pca_result[:,2]
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))


In [None]:
spose_embeds['category'] = spose_embeds['concept'].apply(lambda x: things_plus_dict[x] if x in things_plus_dict.keys() else 'other')

sns.scatterplot(x='pca-one',y='pca-two',data = spose_embeds[spose_embeds['category']=='drink'],hue = 'concept',legend=True)
###place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)



In [None]:
spose_embeds['category'] = spose_embeds['concept'].apply(lambda x: things_plus_dict[x] if x in things_plus_dict.keys() else 'other')
plt.figure(figsize=(8,8))
sns.scatterplot(x='pca-one',y='pca-two',data = spose_embeds,hue = 'category',legend=False)
plt.yticks([])
plt.xticks([])
plt.ylabel('')
plt.xlabel('')
plt.savefig('VSS2023_pca.pdf')


### survey plot

In [None]:

survey_df =  things_draw_prod_df[things_draw_prod_df.trial_type=='survey-text'].reset_index(drop=True)
survey_df['response'] = survey_df.apply(lambda x: ast.literal_eval(x.response),axis=1)
survey_df['state'] =survey_df['response'].apply(lambda x: x['State'].lower().strip())



In [None]:
demog_responses = things_draw_prod_df[things_draw_prod_df.trial_type=='survey'].reset_index()
demog_responses['response'] = demog_responses['response'].apply(ast.literal_eval)
for key in demog_responses.response[0].keys():
    demog_responses[key] = demog_responses.response.apply(lambda x: x[key])
    


In [None]:
demog_responses.participantEthnicity.value_counts()

In [None]:
state_dict = {
    'alabama': 'AL', 'al': 'AL', 'ala': 'AL', 
    'alas': 'AK', 'alaska': 'AK', 'ak': 'AK',
    'arizona': 'AZ', 'az': 'AZ', 'phoenix, az': 'AZ',
    'arkansas': 'AR', 'arkansans': 'AR', 'ar': 'AR',
    'california': 'CA', 'calif': 'CA', 'ca': 'CA', 'california ': 'CA', 'san diego county, ca':'CA','la/new orleans':'CA',
    'colorado': 'CO', 'colo': 'CO', 'co': 'CO','colorad': 'CO','colorado, usa, 80013': 'CO','colorado, westminister': 'CO','denver': 'CO',
    'connecticut': 'CT', 'conn': 'CT', 'ct': 'CT',
    'delaware': 'DE', 'del': 'DE', 'de': 'DE', 
    'district of columbia': 'DC', 'dc': 'DC',
    'florida': 'FL', 'fla': 'FL', 'fl': 'FL','fl kissimmee': 'FL',
    'georgia': 'GA', 'ga': 'GA','georgia ': 'GA','gerogia': 'GA','georgia, usa': 'GA','atlanta, georgia': 'GA','atlanta':'GA','atlanta ga':'GA',
    'hawaii': 'HI', 'hi': 'HI', 
    'idaho': 'ID', 'id': 'ID', 
    'illinois': 'IL', 'ill': 'IL', 'il': 'IL','illionois': 'IL','illinois ': 'IL',
    'indiana': 'IN', 'ind': 'IN', 'in': 'IN', 
    'iowa': 'IA', 'ia': 'IA',
    'kansas': 'KS', 'kan': 'KS', 'ks': 'KS', 
    'kentucky': 'KY', 'ky': 'KY', 'ky.': 'KY','kentucky ': 'KY',
    'louisiana': 'LA', 'la': 'LA',
    'maine': 'ME', 'me': 'ME', 
    'maryland': 'MD', 'md': 'MD', 
    'massachusetts': 'MA', 'mass': 'MA', 'ma': 'MA',
    'michigan': 'MI', 'mich': 'MI', 'mi': 'MI', 'michihan': 'MI', 'michigan ': 'MI',
    'minnesota': 'MN', 'minn': 'MN', 'mn': 'MN',
    'mississippi': 'MS', 'miss': 'MS', 'ms': 'MS', 
    'missouri': 'MO', 'mo': 'MO', 'missouri ': 'MO',
    'montana': 'MT', 'mt': 'MT',
    'nebraska': 'NE', 'nebr': 'NE', 'ne': 'NE',
    'nevada': 'NV', 'nev': 'NV', 'nv': 'NV',
    'new hampshire': 'NH', 'nh': 'NH', 
    'new jersey': 'NJ', 'nj': 'NJ', 
    'new mexico': 'NM', 'nm': 'NM',
    'new york': 'NY', 'ny': 'NY', 'nyc': 'NY',
    'north carolina': 'NC', 'n carolina': 'NC', 'nc': 'NC', 'wake forest, nc': 'NC',
    'north dakota': 'ND', 'n dakota': 'ND', 'nd': 'ND',
    'ohio': 'OH', 'oh': 'OH', 'columbus, ohio': 'OH',
    'oklahoma': 'OK', 'okla': 'OK', 'ok': 'OK','oklahoma ':'OK',
    'oregon': 'OR', 'ore': 'OR', 'or': 'OR', 
    'pennsylvania': 'PA', 'pa': 'PA', 'penn': 'PA', 'pennsylvania ': 'PA',
    'rhode island': 'RI', 'ri': 'RI',
    'south carolina': 'SC', 's carolina': 'SC', 'sc': 'SC',
    'south dakota': 'SD', 's dakota': 'SD', 'sd': 'SD', 
    'tennessee': 'TN', 'tenn': 'TN', 'tn': 'TN',
    'texas': 'TX', 'tex': 'TX', 'tx': 'TX', 
    'utah': 'UT', 'ut': 'UT',
    'vermont': 'VT', 'vt': 'VT',
    'virginia': 'VA', 'va': 'VA','virginia beach': 'VA','virginia ': 'VA','virginias': 'VA',
    'washington': 'WA', 'wash': 'WA', 'wa': 'WA','washington state': 'WA',
    'west virginia': 'WV', 'wv': 'WV', 
    'wisconsin': 'WI', 'wis': 'WI', 'wi': 'WI',
    'wyoming': 'WY', 'wyo': 'WY', 'wy': 'WY',
    '':'','1':'','north':'','united state':'',
}

# make all the the text in survey_df.state lowercase
survey_df['state_short'] = survey_df['state'].apply(lambda x:state_dict[x])

In [None]:


state_counts = survey_df['state_short'].value_counts()
state_counts_dict = dict(zip(state_counts.index, state_counts.values))

### sor the keys in state_counts_dict alphabetically
state_counts_dict = {k: v for k, v in sorted(state_counts_dict.items(), key=lambda item: item[0])}
state_counts_dict

In [None]:
import plotly.graph_objs as go
import json

# Create a Choropleth plot
choropleth = go.Choropleth(
    locations=list(state_counts_dict.keys()),
    locationmode='USA-states',
    z=list(state_counts_dict.values()),
    colorscale='magma_r',
    colorbar_title='Count'
)

# Create a Scattergeo trace
scattergeo = go.Scattergeo(
    mode='markers',
    locationmode='USA-states',
    locations=list(state_counts_dict.keys()),
    marker=dict(
        size=list(state_counts_dict.values()),
        sizemode='diameter',
        sizeref=2,
        color='rgba(255, 0, 0, 0.7)',
        line=dict(
            width=1,
            color='rgba(0, 0, 0, 0.5)'
        ),
        symbol='circle'
    ),
    hoverinfo='text',
    text=[f"{state}: {count}" for state, count in state_counts_dict.items()]
)

# Create a Figure and add the Choropleth and Scattergeo traces
fig = go.Figure(data=[choropleth])



In [None]:

fig.update_layout(
    geo=dict(
        scope='usa',
        projection=go.layout.geo.Projection(type='albers usa'),
        center=dict(lon=-95, lat=37),
    
    ),
        height=500,
        width=800,
    
)

### demographics

In [None]:
things_draw_prod_df[things_draw_prod_df.trial_type=='survey'].response

In [None]:
### only include rows in things_draw_recog_demo_df if the workerID is in things_draw_recog_df.workerID

things_draw_recog_demo_df = things_draw_recog_demo_df[things_draw_recog_demo_df.workerID.isin(things_draw_recog_df.workerID)]

In [None]:
### in things_draw_recog_demo_df if there are multiple rows with the same workerID, keep the row that appears first
things_draw_recog_demo_df = things_draw_recog_demo_df.drop_duplicates(subset=['workerID'],keep='first')

In [None]:
things_draw_recog_demo_df.response.apply(lambda x: eval(x)['participant_sex'].lower().strip()).value_counts()

In [None]:
things_draw_recog_demo_df.response.apply(lambda x: int(eval(x)['participant_age'])).mean()


In [None]:
things_draw_recog_df.workerID.nunique()

In [None]:
ages =[]
for this_age in range(things_draw_prod_df.workerID.nunique()):
    ages.append(things_draw_recog_demo_df.sample(1).response.apply(lambda x: int(eval(x)['participant_age'])))
np.mean(ages)


### prod df statistics

In [None]:
prod_demographic_trials = things_draw_prod_df[things_draw_prod_df.trial_type=='survey'].reset_index(drop=True)
prod_demographic_trials = prod_demographic_trials.drop_duplicates(subset=['workerID'],keep='first')

In [None]:
prod_demographic_trials.response.apply(lambda x: eval(x)['participantSex'].lower().strip()).value_counts()

In [None]:
1314 - 735 - 511 -18

### scratch