In [None]:
import argilla as rg
from argilla_utils import build_info_dataset, build_completion_dataset, build_annotation_dataset, make_password, make_name
import pandas as pd
from tqdm.autonotebook import tqdm
import os
import os.path as osp
import datetime

MODE = 'grid'               # 'inline', 'side' or 'grid', default: 'grid'

# Prolific settings
COMPLETION_CODE = "CODE"
COMPLETION_URL = "https://app.prolific.com/submissions/complete?cc=CODE"

# random seed
RANDOM_STATE = 123

# paths
data_dir = osp.abspath('../generated_data/')
experiment_slice_dir = osp.join(data_dir, 'experiment_slices')
backup_dir = osp.join(experiment_slice_dir, 'backups')
credential_file = 'group_argilla_credentials.sh'

full_partition_file = 'argilla_partitions.csv'
full_user_file = 'argilla_users.csv'

slice_partition_file = 'argilla_partitions_0.csv'
slice_user_file = 'argilla_users_0_0.csv'
other_slice_user_file = f'argilla_users_0{"_1" if "_0." in slice_user_file else "_0"}.csv'

add_partition_file = f'ADD_{slice_partition_file}'
add_user_file = f'ADD_{slice_user_file}'

copy_from_user = 'gxxwaytb'

In [None]:
# parse credentials
with open(credential_file, 'r') as f:
    lines = f.readlines()
    content_lines = [c.strip() for c in lines if "=" in c]
    credentials = {
        l.split('=')[0]: l.split('=')[1] 
        for l in content_lines
    }
    
    
# connect as owner to argilla server
rg.init(
    api_url=credentials['ARGILLA_API_URL'],
    api_key=credentials['OWNER_API_KEY'],
    #extra_headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}
)

# print owner info
print(rg.User.me())

print(credentials['ARGILLA_API_URL'])

In [None]:
slice_user_filepath = osp.join(experiment_slice_dir, slice_user_file)
print(f'load slice credentials from {slice_user_filepath} ...')
slice_user_df = pd.read_csv(slice_user_filepath, index_col=0)

other_slice_user_filepath = osp.join(experiment_slice_dir, other_slice_user_file)
print(f'load other slice credentials from {other_slice_user_filepath} ...')
other_slice_user_df = pd.read_csv(other_slice_user_filepath, index_col=0)

slice_partition_filepath = osp.join(experiment_slice_dir, slice_partition_file)
print(f'load slice partitions from {slice_partition_filepath} ...')
slice_partition_df = pd.read_csv(slice_partition_filepath, index_col=0)

full_user_filepath = osp.join(data_dir, full_user_file)
print(f'load slice credentials from {full_user_filepath} ...')
full_user_df = pd.read_csv(full_user_filepath, index_col=0)

full_partition_filepath = osp.join(data_dir, full_partition_file)
print(f'load slice partitions from {full_partition_filepath} ...')
full_partition_df = pd.read_csv(full_partition_filepath, index_col=0)

In [None]:
# get entry for origin user

user_entry = slice_user_df.loc[slice_user_df.username == copy_from_user]
assert len(user_entry) == 1
user_entry = user_entry.iloc[0]
user_idx = user_entry.name

user_workspace = user_entry.workspace
user_partition = user_entry.partition

# select new workspace

full_and_slice_df = pd.concat([full_user_df, slice_user_df, other_slice_user_df])

partition_users = full_and_slice_df.loc[full_and_slice_df.partition == user_partition]
partition_user_workspaces = partition_users.workspace.tolist()
all_partition_annotator_idx = list(map(lambda x: int(x.split('_')[1]), partition_user_workspaces))

new_user_partition_annotator_idx = max(all_partition_annotator_idx) + 1
new_user_workspace = f'{user_partition}_{new_user_partition_annotator_idx}'

workspace_partition_map = {new_user_workspace: user_partition}

# make new username

new_user_name = ''
existing_names = full_and_slice_df.username.tolist()
while new_user_name == '' or new_user_name in existing_names:
    # ensure that generated user names are valid
    new_user_name = make_name()
new_password = make_password()

assert new_user_name not in existing_names

# make new entry

new_user_entry = user_entry.copy()
new_user_entry.username = new_user_name
new_user_entry.password = new_password
new_user_entry.workspace = new_user_workspace
new_user_entry.partition_annotator_idx = new_user_partition_annotator_idx

In [None]:
new_user_entry

In [None]:
# save backup of user file

time = datetime.datetime.now()
time_str = time.strftime("%Y-%m-%d_%H-%M-%S")
backup_filename = slice_user_file.replace('.csv', f'_SAVE_{time_str}.csv')

if not osp.isdir(backup_dir):
    os.makedirs(backup_dir)

backup_path = osp.join(backup_dir, backup_filename)
print(f'saving backup to {backup_path}')
slice_user_df.to_csv(backup_path)

# set valid flag to false
slice_user_df.loc[user_idx, 'valid'] = False

# update slice df
new_slice_user_df = pd.concat([
    slice_user_df,
    pd.DataFrame(new_user_entry).T
])

new_slice_user_df.to_csv(slice_user_filepath)

In [None]:
# make workspace
rg.Workspace.create(new_user_workspace)

# make user
rg.User.create(
        username=new_user_entry.username,
        password=new_user_entry.password,
        workspaces=[new_user_entry.workspace],
        role="annotator",
    )

# select workspace obj (for dataset creation)
workspaces = [w for w in rg.Workspace.list() if w.name == new_user_workspace]
assert len(workspaces) == 1
workspace = workspaces[0]

In [None]:
workspace

In [None]:
# build datasets
info_dataset = build_info_dataset(workspace)
completion_dataset = build_completion_dataset(workspace, COMPLETION_CODE, COMPLETION_URL)
annotation_dataset = build_annotation_dataset(workspace, workspace_partition_map, slice_partition_df, credentials['IMG_LOCATION'], MODE, RANDOM_STATE)

# push info, completion and annotation datasets to workspace (in reversed order)
completion_dataset.push_to_argilla(name=completion_dataset[0].metadata['dataset_name'], workspace=workspace.name, show_progress=False)
annotation_dataset.push_to_argilla(name=annotation_dataset[0].metadata['dataset_name'], workspace=workspace.name, show_progress=False)
info_dataset.push_to_argilla(name=info_dataset[0].metadata['dataset_name'], workspace=workspace.name, show_progress=False)