In [None]:
import pandas as pd
import os
import os.path as osp
from PIL import Image
import requests
from io import BytesIO
import numpy as np

IMG_LOCATION=osp.abspath('../generated_items/')
slice = '0_0'
data_dir = osp.abspath('../generated_data/')
credentials_dir = osp.join(data_dir, 'credentials_data', slice.replace('_', '-'))

users_file = osp.join(data_dir, 'experiment_slices', f'argilla_users_{slice}.csv')
anns_file = osp.join(data_dir, 'experiment_slices', 'results', slice, 'collected_annotations.csv')
screening_file = osp.join(data_dir, 'experiment_slices', 'results', slice, 'annotation_screening.csv')

In [None]:
_anns = pd.read_csv(anns_file, index_col=0)
print(len(_anns))

completion_data = pd.read_csv(osp.join(data_dir, 'completion_data.csv'), index_col=0)

users_df = pd.read_csv(users_file, index_col=1)

credential_dfs = [pd.read_csv(osp.join(credentials_dir, file), index_col=0) for file in os.listdir(credentials_dir)]
credential_df = pd.concat(credential_dfs).reset_index()
credential_df = credential_df[credential_df.Status == 'USED']

credential_df = pd.merge(credential_df, users_df[['username', 'valid']], left_on='Username', right_on='username')

anns = pd.merge(
    _anns, 
    credential_df[['Username', 'Participant Id', 'valid']], 
    left_on='user_name', 
    right_on='Username'
).rename(columns={'Participant Id': 'prolific_id'})
print(len(anns))

In [None]:
credential_df

In [None]:
anns = anns[anns['valid']]
credential_df = credential_df[credential_df['valid']]

In [None]:
user_ann_counts = anns.groupby('user_name').count().raw_annotation
pd.merge(credential_df, user_ann_counts, left_on='Username', right_index=True).sort_values(by='raw_annotation')

In [None]:
anns.groupby('item_id').size().value_counts()

In [None]:
anns.groupby('user_name').size().value_counts()

# per Annotator

In [None]:
if not osp.isfile(screening_file):
    print('make new file...')
    screening_df = pd.DataFrame(credential_df[['username', 'Participant Id']])
    screening_df['ok'] = ''
    screening_df['comments'] = ''
    screening_df['requested_return'] = ''
    screening_df.to_csv(screening_file)
else:
    print('use existing file...')
    screening_df = pd.read_csv(screening_file, index_col=0)
    
# add new entries from credentials file
    
additional_entries = credential_df.loc[
    ~credential_df.username.isin(screening_df.username.values)]

if len(additional_entries) > 0:
    print(f'expanding file with {len(additional_entries)} additional entries...')
    additional_entries = additional_entries[['username', 'Participant Id']]
    additional_entries.loc[:, 'ok'] = ''
    additional_entries.loc[:, 'comments'] = ''
    additional_entries.loc[:, 'requested_return'] = ''

    screening_df = pd.concat([screening_df, additional_entries]).reset_index(drop=True)
    screening_df.to_csv(screening_file)

In [None]:
screening_df.ok = screening_df.ok.map(lambda x: x == 'y')
screening_df.requested_return = screening_df.requested_return.map(lambda x: x in ['y', 'returned'])

In [None]:
screening_df

In [None]:
user = 'xxlsnhpt'

user_anns = anns.loc[anns.user_name == user].sort_values('item_id').sample(frac=1.0)
print(f'{user} : {len(user_anns)} entries ({"complete" if len(user_anns) == 37 else "INCOMPLETE"})', '\n------------------------\n')

for _, x in user_anns.iterrows():
    # if _ > 10:
    #     break
    print(x.item_id)
    img_name = osp.split(x.image_url)[-1]
    img_path = osp.join(IMG_LOCATION, img_name)
    img = Image.open(img_path).resize((256,256))
    display(img)

    print(x.raw_annotation)
    
    print('\n--------------------\n')

# per Item

In [None]:
agg_anns = anns.groupby('item_id').agg(list).reset_index()
for _, x in agg_anns.iterrows():
    if _ > 10:
        break
    
    img_name = osp.split(x.image_url[0])[-1]
    img_path = osp.join(IMG_LOCATION, img_name)
    img = Image.open(img_path)
    
    # response = requests.get(x.image_url[0])
    # img = Image.open(BytesIO(response.content))
    
    display(img)

    for a, n in zip(x.raw_annotation, x.user_name):
        print(f'{n}\t:\t{a}')
        
    print('\n-------------------------------\n')