In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
host = 'www.eye-ai.org'
# host = 'dev.eye-ai.org'
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
EA.add_term(vc.workflow_type, "Test Workflow", description="A test Workflow for new DM")
# Workflow instance
new_workflow = Workflow(
    name="Fill diag exec_rid",
    url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/schema_changes/fix_diag_exec.ipynb",
    workflow_type="Data_Model_Changes"
)

config = ExecutionConfiguration(
    workflow=new_workflow,
    description="fill execution rid for UI annotation on prod")


In [None]:
def batchinsert(table, entities):
    n = len(entities)
    batch_num = min(2000, n)
    for i in range(n//batch_num):
        table.insert(entities[i*batch_num: (i+1)*batch_num], nondefaults={'RCB', 'RCT'})
        logging.info("Processed batch: %d to %d", i * batch_num, (i + 1) * batch_num)
    if (i+1)*batch_num < n:
        table.insert(entities[(i+1)*batch_num: n], nondefaults={'RCB', 'RCT'})
        logging.info("Processed batch: %d to %d", (i + 1) * batch_num, n)

In [None]:
def update_exec_rid(diag_records):
    re = {}
    diagnosis = EA.domain_path.Image_Diagnosis
    tags = diag_records['Diagnosis_Tag'].unique()
    graders = diag_records['RCB'].unique()
    for tag in tags:
        if tag == 'CNN_Prediction':
            continue
        elif tag == 'Initial Diagnosis':
            insert_rec = diag_records[diag_records['Diagnosis_Tag'] == tag]
            # initiate exec
            execution = EA.create_execution(config)
            execution_rid = execution.execution_rid
            insert_rec = insert_rec.copy()
            insert_rec['Execution'] = execution_rid
            entities = insert_rec.apply(
                lambda row: {k: v for k, v in row.items() if pd.notnull(v)}, axis=1).tolist()
            batchinsert(diagnosis, entities)
            re[tag] = entities
        else:
            print('else')
            re[tag]={}
            for grader in graders:
                insert_rec = diag_records[(diag_records['Diagnosis_Tag'] == tag) & (diag_records['RCB'] == grader)]
                if len(insert_rec) == 0:
                    print("The DataFrame is empty.")
                else:
                    execution = EA.create_execution(config)
                    execution_rid = execution.execution_rid
                    insert_rec = insert_rec.copy()
                    insert_rec['Execution'] = execution_rid
                    entities = insert_rec.apply(
                        lambda row: {k: v for k, v in row.items() if pd.notnull(v)}, axis=1).tolist()
                    batchinsert(diagnosis, entities)
                    re[tag][grader] = entities
    return re



In [None]:
newlac_diag = pd.read_csv('newlac_diag.csv')
lac_diag = pd.read_csv('lac_diag.csv')
rot_diag = pd.read_csv('rot_diag.csv')

In [None]:
re_lac = update_exec_rid(lac_diag)

In [None]:
re_rot = update_exec_rid(rot_diag)

In [None]:
re_newlac = update_exec_rid(newlac_diag)