In [1]:
from pathlib import Path
import sys

# Define the base repository directory
repo_dir = Path.home() / "Desktop" / "eye_ai" / "Github"  # Update to your GitHub repo location

# Update the load path so Python can find modules for the model
sys.path.insert(0, str(repo_dir / "deriva-ml"))
sys.path.insert(0, str(repo_dir / "eye-ai-ml"))

# Reload extensions if needed
%load_ext autoreload
%autoreload 2


In [2]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml.deriva_ml_base import MLVocab as vc, Status
from deriva_ml.execution_configuration import ExecutionConfiguration, Workflow, Execution

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [3]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# host = 'dev.eye-ai.org'
# catalog_id = "428"

host = 'www.eye-ai.org'
catalog_id = "21"
# catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

2024-11-21 11:50:47,971 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-21 11:50:47,973 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


You are already logged in.


In [4]:
# Variables to configure the rest of the notebook.
cache_dir = Path.home() / 'Desktop/test_cache'
working_dir = Path.home() / 'Desktop/test_cache'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

2024-11-21 11:50:50,783 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-21 11:50:50,784 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2024-11-21 11:50:52,629 - INFO - Loading dirty model.  Consider commiting and tagging: 1.1.0.post76+git.403fdd90.dirty


# Configuration

In [5]:
# Add Workflow Vocab terms
EA.add_term(vc.workflow_type, "Data_Model_Changes", description="Workflows to support data model changes")
# Workflow instance
new_workflow = Workflow(
    name="Fill diag exec_rid",
    url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/Data_Model_Update/notebooks/schema_changes/fill_diag_exec_rid.ipynb",
    workflow_type="Data_Model_Changes"
)
# Configuration instance
config = ExecutionConfiguration(
    bdbags=[],
    models = [],
    execution=Execution(description="fill execution rid for UI annotation on dev"),
    workflow=new_workflow)
# Initialize execution
# configuration_record = EA.initialize_execution(config)
# execution_rid = configuration_record.execution_rid

In [6]:
def check_dup(dataset_rid: str):
    results = subject_dataset.path\
        .filter(subject_dataset.Dataset == dataset_rid) \
        .link(subject, on=subject_dataset.Subject == subject.RID) \
        .link(observation, on=subject.RID == observation.Subject) \
        .link(image, on=observation.RID == image.Observation) \
        .link(diagnosis, on=image.RID == diagnosis.Image)

    diag_records = pd.DataFrame(results.entities().fetch())
    diag_records = pd.merge(diag_records, pd.DataFrame(EA.user_list()), how="left", left_on='RCB', right_on='ID')
    diag_records = diag_records[diag_records['Diagnosis_Tag'] != 'CNN_Prediction']
    return diag_records[diag_records.duplicated(subset=["Image", "Diagnosis_Tag","Full_Name"], keep="first")]

In [8]:
subject_dataset = EA.domain_schema_instance.Subject_Dataset
subject = EA.domain_schema_instance.Subject
image = EA.domain_schema_instance.Image
observation = EA.domain_schema_instance.Observation
diagnosis = EA.domain_schema_instance.Diagnosis

lac = '2-1S12'
new_lac = '2-N93J'

In [9]:
lac_dup = check_dup(lac)
new_lac_dup = check_dup(new_lac)

In [11]:
lac_dup

Unnamed: 0,RID,RCT,RMT,RCB,RMB,Execution,Image,Feature_Name,Diagnosis_Image,Image_Quality,Diagnosis_Tag,Diagnosis_Status,Cup/Disk_Ratio,Comments,Process,ID,Full_Name


In [12]:
def batchUpdate(table, entities, update_cols):
    n = len(entities)
    batch_num = min(2000, n)
    for i in range(n//batch_num):
        table.update(entities[i*batch_num: (i+1)*batch_num], [table.RID], update_cols)
        logging.info("Processed batch: %d to %d", i * batch_num, (i + 1) * batch_num)
    if (i+1)*batch_num < n:
        table.update(entities[(i+1)*batch_num: n], [table.RID], update_cols)
        logging.info("Processed batch: %d to %d", (i + 1) * batch_num, n)

In [13]:


def update_exec_rid(dataset_rid: str):
    results = subject_dataset.path\
        .filter(subject_dataset.Dataset == dataset_rid) \
        .link(subject, on=subject_dataset.Subject == subject.RID) \
        .link(observation, on=subject.RID == observation.Subject) \
        .link(image, on=observation.RID == image.Observation) \
        .link(diagnosis, on=image.RID == diagnosis.Image)

    diag_records = pd.DataFrame(results.entities().fetch())
    diag_records = pd.merge(diag_records, pd.DataFrame(EA.user_list()), how="left", left_on='RCB', right_on='ID')
    tags = diag_records['Diagnosis_Tag'].unique()
    graders = diag_records['Full_Name'].unique()
    for tag in tags:
        if tag == 'CNN_Prediction':
            continue
        elif tag == 'Initial Diagnosis':
            update_rec = diag_records[diag_records['Diagnosis_Tag'] == tag][['RID', 'Execution']]
            configuration_record = EA.initialize_execution(config)
            execution_rid = configuration_record.execution_rid
            update_rec['Execution'] = execution_rid
            update_entities = update_rec.to_dict(orient='records')
            batchUpdate(diagnosis, update_entities, [diagnosis.Execution])
            EA.update_status(Status.completed, "update execution rid to diagnosis", execution_rid)
        else:
            for grader in graders:
                update_rec = diag_records[(diag_records['Diagnosis_Tag'] == tag) & (diag_records['Full_Name'] == grader)][['RID', 'Execution']]
                if len(update_rec) == 0:
                    print("The DataFrame is empty.")
                else:
                    configuration_record = EA.initialize_execution(config)
                    execution_rid = configuration_record.execution_rid
                    update_rec['Execution'] = execution_rid
                    update_entities = update_rec.to_dict(orient='records')
                    batchUpdate(diagnosis, update_entities, [diagnosis.Execution])
                    EA.update_status(Status.completed, "update execution rid to diagnosis", execution_rid)


In [14]:
rotterdam = '1-EATE'
result = update_exec_rid(rotterdam)

2024-11-21 11:52:05,430 - INFO - Configuration validation successful!
2024-11-21 11:52:09,410 - INFO - Processed batch: 0 to 2000
2024-11-21 11:52:10,316 - INFO - Processed batch: 2000 to 4000
2024-11-21 11:52:11,154 - INFO - Processed batch: 4000 to 6000
2024-11-21 11:52:12,178 - INFO - Processed batch: 6000 to 8000
2024-11-21 11:52:13,286 - INFO - Processed batch: 8000 to 10000
2024-11-21 11:52:14,281 - INFO - Processed batch: 10000 to 12000
2024-11-21 11:52:15,162 - INFO - Processed batch: 12000 to 14000
2024-11-21 11:52:16,145 - INFO - Processed batch: 14000 to 16000
2024-11-21 11:52:17,222 - INFO - Processed batch: 16000 to 18000
2024-11-21 11:52:18,215 - INFO - Processed batch: 18000 to 20000
2024-11-21 11:52:19,252 - INFO - Processed batch: 20000 to 22000
2024-11-21 11:52:20,129 - INFO - Processed batch: 22000 to 24000
2024-11-21 11:52:21,197 - INFO - Processed batch: 24000 to 26000
2024-11-21 11:52:22,560 - INFO - Processed batch: 26000 to 28000
2024-11-21 11:52:23,598 - INFO -

In [15]:
lac = '2-1S12'
new_lac = '2-N93J'
result = update_exec_rid(lac)

2024-11-21 11:56:46,207 - INFO - Configuration validation successful!
2024-11-21 11:56:48,667 - INFO - Processed batch: 0 to 2000
2024-11-21 11:56:49,197 - INFO - Processed batch: 2000 to 3200
2024-11-21 11:56:49,424 - INFO - Configuration validation successful!


The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.


2024-11-21 11:56:50,667 - INFO - Processed batch: 0 to 13
2024-11-21 11:56:50,779 - INFO - Configuration validation successful!
2024-11-21 11:56:52,078 - INFO - Processed batch: 0 to 30
2024-11-21 11:56:52,301 - INFO - Configuration validation successful!


The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.


2024-11-21 11:56:53,854 - INFO - Processed batch: 0 to 800
2024-11-21 11:56:54,158 - INFO - Configuration validation successful!


The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.


2024-11-21 11:56:56,142 - INFO - Processed batch: 0 to 1000
2024-11-21 11:56:56,262 - INFO - Configuration validation successful!
2024-11-21 11:56:57,866 - INFO - Processed batch: 0 to 1000
2024-11-21 11:56:57,979 - INFO - Configuration validation successful!
2024-11-21 11:56:59,820 - INFO - Processed batch: 0 to 1000
2024-11-21 11:56:59,935 - INFO - Configuration validation successful!
2024-11-21 11:57:01,683 - INFO - Processed batch: 0 to 1000
2024-11-21 11:57:01,804 - INFO - Configuration validation successful!
2024-11-21 11:57:03,427 - INFO - Processed batch: 0 to 1000
2024-11-21 11:57:03,620 - INFO - Configuration validation successful!
2024-11-21 11:57:05,443 - INFO - Processed batch: 0 to 1000
2024-11-21 11:57:05,555 - INFO - Configuration validation successful!
2024-11-21 11:57:07,277 - INFO - Processed batch: 0 to 1000
2024-11-21 11:57:07,390 - INFO - Configuration validation successful!
2024-11-21 11:57:09,014 - INFO - Processed batch: 0 to 1000
2024-11-21 11:57:09,124 - INFO

The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.
The DataFrame is empty.


2024-11-21 11:57:20,405 - INFO - Processed batch: 0 to 2000
2024-11-21 11:57:21,280 - INFO - Processed batch: 2000 to 4000
2024-11-21 11:57:22,301 - INFO - Processed batch: 4000 to 6000
2024-11-21 11:57:23,224 - INFO - Processed batch: 6000 to 8000
2024-11-21 11:57:24,145 - INFO - Processed batch: 8000 to 10000
2024-11-21 11:57:25,148 - INFO - Processed batch: 10000 to 12000
2024-11-21 11:57:26,354 - INFO - Processed batch: 12000 to 14000
2024-11-21 11:57:27,308 - INFO - Processed batch: 14000 to 16000
2024-11-21 11:57:28,241 - INFO - Processed batch: 16000 to 18000
2024-11-21 11:57:29,214 - INFO - Processed batch: 18000 to 20000
2024-11-21 11:57:30,068 - INFO - Processed batch: 20000 to 22000
2024-11-21 11:57:31,350 - INFO - Processed batch: 22000 to 24000
2024-11-21 11:57:32,649 - INFO - Processed batch: 24000 to 26000
2024-11-21 11:57:33,558 - INFO - Processed batch: 26000 to 28000
2024-11-21 11:57:34,486 - INFO - Processed batch: 28000 to 30000
2024-11-21 11:57:35,389 - INFO - Proc

In [108]:
update_exec_rid(new_lac)

2024-11-20 23:15:48,841 - INFO - Configuration validation successful!
2024-11-20 23:15:52,275 - INFO - Processed batch: 0 to 2000
2024-11-20 23:15:53,737 - INFO - Processed batch: 2000 to 4000
2024-11-20 23:15:54,703 - INFO - Processed batch: 4000 to 6000
2024-11-20 23:15:55,828 - INFO - Processed batch: 6000 to 8000
2024-11-20 23:15:56,732 - INFO - Processed batch: 8000 to 10000


DataPathException: DETAIL:  Key ("Execution", "Image", "Feature_Name")=(5-R6C2, 2-D2GY, Diagnosis) already exists.

409 Client Error: CONFLICT for url: [https://dev.eye-ai.org/ermrest/catalog/428/attributegroup/eye-ai:Diagnosis/RID;Execution] Details: b'Request conflicts with state of server. Detail: Input data violates model. ERROR:  duplicate key value violates unique constraint "Execution_Image_Image_Diagnosis_assoc_key"\nDETAIL:  Key ("Execution", "Image", "Feature_Name")=(5-R6C2, 2-D2GY, Diagnosis) already exists.\n\n'