In [None]:
import argilla as rg
from argilla_utils import build_info_dataset, build_completion_dataset, build_annotation_dataset
import pandas as pd
from tqdm.autonotebook import tqdm
import os.path as osp

# Argilla & data settings
RESET_ARGILLA = True        # delete workspaces, users and datasets
TEST_MODE = False           # restrict dataset size
if TEST_MODE:
    TEST_K_SCENES = 5
    TEST_K_TANGRAMS = 14
MODE = 'grid'               # 'inline', 'side' or 'grid', default: 'grid'
assert MODE in {'inline', 'side', 'grid'}, f'invalid mode: {MODE}'
N_GROUPS = 2                # number of groups between which the tangrams are distributed, default: 2
ANNOTATIONS_PER_ITEM = 10   #  default: 10

# Prolific settings
COMPLETION_CODE = "CODE"
COMPLETION_URL = "https://app.prolific.com/submissions/complete?cc=CODE"

# random seed
RANDOM_STATE = 123

# paths
data_dir = osp.abspath('../generated_data/experiment_slices/')
credential_file = 'group_argilla_credentials.sh'

partition_file = 'argilla_partitions_0.csv'
user_file = 'argilla_users_0_0.csv'

### Connecting to Argilla

In [None]:
# parse credentials
with open(credential_file, 'r') as f:
    lines = f.readlines()
    content_lines = [c.strip() for c in lines if "=" in c]
    credentials = {
        l.split('=')[0]: l.split('=')[1] 
        for l in content_lines
    }
    
    
# connect as owner to argilla server
rg.init(
    api_url=credentials['ARGILLA_API_URL'],
    api_key=credentials['OWNER_API_KEY'],
    #extra_headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}
)

# print owner info
rg.User.me()

In [None]:
credentials['ARGILLA_API_URL']

In [None]:
user_filepath = osp.join(data_dir, user_file)
print(f'load credentials from {user_filepath} ...')
user_df = pd.read_csv(user_filepath, index_col=0)

partition_filepath = osp.join(data_dir, partition_file)
print(f'load partitions from {partition_filepath} ...')
partition_df = pd.read_csv(partition_filepath, index_col=0)

In [None]:
user_df

In [None]:
partition_df

In [None]:
if RESET_ARGILLA:
    
    print('existing workspaces:', [w.name for w in rg.Workspace.list()])
    print('remove existing workspaces...')
    
    for w in tqdm(rg.Workspace.list()): 
    
        # remove datasets assigned to workspace
        workspace_datasets = rg.FeedbackDataset.list(workspace=w.name)
        for d in workspace_datasets:
            rg.FeedbackDataset.from_argilla(name=d.name, workspace=w.name).delete()
    
        # remove workspace
        w.delete()
        
    annotators = [u for u in rg.User.list() if u.role == 'annotator']
    print('existing annotators:', [u.username for u in annotators])
    print('remove existing annotator users...')

    for a in tqdm(annotators):
        a.delete()

### Create Argilla Workspaces and Users

In [None]:
# create argilla workspaces

workspace_names = user_df.workspace.to_list()
workspace_partition_map = {w: w.split('_')[0] for w in workspace_names}
for workspace_name in tqdm(workspace_names):
    rg.Workspace.create(workspace_name)
    
ann_workspaces = rg.Workspace.list()

In [None]:
# create argilla users

for _, user in tqdm(user_df.iterrows(), total=len(user_df)):
    rg.User.create(
        username=user.username,
        password=user.password,
        workspaces=[user.workspace],  # also assign info and finish workspaces
        role="annotator",
    )
    
annotators = [u for u in rg.User.list() if u.role == 'annotator']

In [None]:
assert len(annotators) == len(ann_workspaces)

### Create Records and Push to Workspaces

In [None]:
for workspace in tqdm(ann_workspaces):
    
    # build datasets
    info_dataset = build_info_dataset(workspace)
    completion_dataset = build_completion_dataset(workspace, COMPLETION_CODE, COMPLETION_URL)
    annotation_dataset = build_annotation_dataset(workspace, workspace_partition_map, partition_df, credentials['IMG_LOCATION'], MODE, RANDOM_STATE)
    
    # push info, completion and annotation datasets to workspace (in reversed order)
    completion_dataset.push_to_argilla(name=completion_dataset[0].metadata['dataset_name'], workspace=workspace.name, show_progress=False)
    annotation_dataset.push_to_argilla(name=annotation_dataset[0].metadata['dataset_name'], workspace=workspace.name, show_progress=False)
    info_dataset.push_to_argilla(name=info_dataset[0].metadata['dataset_name'], workspace=workspace.name, show_progress=False)