In [1]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "deriva-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [2]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
from deriva.chisel import Model, Schema, Table, Column, Key, ForeignKey, builtin_types
from deriva_ml.deriva_ml_base import DerivaML, DerivaMLException, FileUploadState, UploadState, ColumnDefinition, BuiltinTypes


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [3]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# host = 'dev.eye-ai.org'
# catalog_id = "428"

host = 'www.eye-ai.org'
catalog_id = "21"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

2024-11-06 09:57:30,698 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-06 09:57:30,698 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


You are already logged in.


In [4]:
cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.
EA = EyeAI(hostname = host, catalog_id = catalog_id, 
           cache_dir= cache_dir, working_dir=working_dir)

2024-11-06 09:58:04,968 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-06 09:58:04,969 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2024-11-06 09:58:06,537 - INFO - have working dir at /data/lizhiwei/EyeAI_working


In [5]:
def insert(table, mapping_list, num_up):
    ingestion = []
    n = len(mapping_list)
    for i in range(n // num_up):
        entities = table.insert(mapping_list[i * num_up: (i + 1) * num_up], nondefaults={'RCB'})
        ingestion.append(entities)
        logging.info(f"Updated indices: {i * num_up} to {(i + 1) * num_up}")
    remaining_start = (i + 1) * num_up if n >= num_up else 0
    if remaining_start < n:
        entities = table.insert(mapping_list[remaining_start: n], nondefaults={'RCB'})
        ingestion.append(entities)
        logging.info(f"Updated indices: {remaining_start} to {n}")
    return ingestion

In [6]:
column_cdratio = ColumnDefinition(
    name='Cup/Disk_Ratio',
    type=BuiltinTypes.float4,
    nullok=True
)

column_comemnts = ColumnDefinition(
    name='Comments',
    type=builtin_types.text,
    nullok=True
)
Process_table = EA.model.schemas['eye-ai'].tables['Process']

feature_diagnosis = EA.create_feature(feature_name = "Image_Diagnosis", 
                                      table = "Image", 
                                      terms = ["Diagnosis_Image", "Image_Quality", "Diagnosis_Tag", "Diagnosis_Status"], 
                                      metadata = [column_cdratio, column_comemnts, Process_table],
                                      comment="Image level diagnosis")

EA.model.schemas['eye-ai'].tables['Execution_Image_Image_Diagnosis'].alter(table_name='Image_Diagnosis')
table = EA.model.schemas['eye-ai'].tables['Image_Diagnosis']
process_column = table.column_definitions['Process']
process_column.alter(nullok=True)
execution_column = table.column_definitions['Execution']
execution_column.alter(nullok=True)
for col in ["Diagnosis_Image", "Image_Quality", "Diagnosis_Tag", "Diagnosis_Status"]:
    column_def = table.column_definitions[col]
    column_def.alter(nullok=True)

# feature_image_annot = EA.create_feature(feature_name = "Image_Annotation", 
#                                         table = "Image", 
#                                         terms = ["Annotation_Function", "Annotation_Type"], 
#                                         assets = ["Execution_Assets"],
#                                         comment="Lateraliity of fundus images")
                                        

# table = EA.model.schemas['eye-ai'].tables['Execution_Image_Image_Annotation']
# execution_column = table.column_definitions['Execution']
# execution_column.alter(nullok=True)

In [8]:
from deriva.core import ErmrestCatalog, get_credential
protocol = 'https'
# hostname = 'dev.eye-ai.org'
hostname = 'www.eye-ai.org'
catalog_number = 'eye-ai'
# catalog_number = '21'
credential = get_credential(hostname)
catalog = ErmrestCatalog(protocol, hostname, catalog_number, credential)

# Get the path builder interface for this catalog
pb = catalog.getPathBuilder()
pb.schemas.keys()

model_root = catalog.getCatalogModel()

eye_ai = pb.schemas['eye-ai']
ml = pb.schemas['deriva-ml']
eye_ai.tables.keys()

diag_table = eye_ai.Diagnosis
diag_entities = pd.DataFrame(diag_table.path.entities())
diag_ingest = diag_entities.drop(['RID', 'RID', 'RCT', 'RMT'], axis=1)
diag_ingest['Feature_Name'] = 'Diagnosis'

diag_map = {e["RID"]: e["Name"] for e in eye_ai.Diagnosis_Image_Vocab.entities()}
tag_map = {e["RID"]: e["Name"] for e in eye_ai.Diagnosis_Tag.entities()}
status_map = {e["RID"]: e["Name"] for e in eye_ai.Diagnosis_Status.entities()}
qua_map = {e["RID"]: e["Name"] for e in eye_ai.Image_Quality_Vocab.entities()}

diag_ingest.replace({"Diagnosis_Vocab": diag_map, 
                     "Diagnosis_Tag": tag_map,
                     "Diagnosis_Status": status_map,
                     "Image_Quality_Vocab": qua_map}, inplace=True)
diag_ingest.rename(columns={"Diagnosis_Vocab": "Diagnosis_Image",
                            "Image_Quality_Vocab": "Image_Quality"}, inplace=True)
ingest_list = diag_ingest.to_dict(orient='records')

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in diag_ingest.to_dict(orient='records')
]
# ingest_list
diag_ingest

2024-11-06 10:03:26,241 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-06 10:03:26,242 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


Unnamed: 0,RCB,RMB,Cup/Disk_Ratio,Image,Process,Diagnosis_Image,Diagnosis_Tag,Diagnosis_Status,Comments,Image_Quality,Execution,Feature_Name
0,https://auth.globus.org/3769492a-b197-4063-952...,https://auth.globus.org/b2541312-d274-11e5-913...,,1-7NVA,,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
1,https://auth.globus.org/3769492a-b197-4063-952...,https://auth.globus.org/b2541312-d274-11e5-913...,,1-7NVC,,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
2,https://auth.globus.org/3769492a-b197-4063-952...,https://auth.globus.org/b2541312-d274-11e5-913...,,1-7NVE,,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
3,https://auth.globus.org/3769492a-b197-4063-952...,https://auth.globus.org/b2541312-d274-11e5-913...,,1-7NVG,,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
4,https://auth.globus.org/3769492a-b197-4063-952...,https://auth.globus.org/b2541312-d274-11e5-913...,,1-7NVJ,,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
...,...,...,...,...,...,...,...,...,...,...,...,...
190077,https://auth.globus.org/b2541312-d274-11e5-913...,https://auth.globus.org/b2541312-d274-11e5-913...,,2-E992,2-CCCJ,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
190078,https://auth.globus.org/b2541312-d274-11e5-913...,https://auth.globus.org/b2541312-d274-11e5-913...,,2-E994,2-CCCJ,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
190079,https://auth.globus.org/b2541312-d274-11e5-913...,https://auth.globus.org/b2541312-d274-11e5-913...,,2-E996,2-CCCJ,No Glaucoma,Initial Diagnosis,,,,,Diagnosis
190080,https://auth.globus.org/b2541312-d274-11e5-913...,https://auth.globus.org/b2541312-d274-11e5-913...,,2-E998,2-CCCJ,No Glaucoma,Initial Diagnosis,,,,,Diagnosis


In [7]:
new_diag_feature = EA.domain_schema_instance.tables['Diagnosis']
# new_diag_feature.path.delete()

In [9]:
 # dataset.update(entities, [dataset.id], [dataset.description, 'title'])
len(ingest_list)


190082

In [10]:
new_diag_feature = EA.domain_schema_instance.tables['Diagnosis']
ingestion = insert(new_diag_feature, ingest_list, 5000)
ingestion

2024-11-06 10:05:29,054 - INFO - Updated indices: 0 to 5000
2024-11-06 10:05:31,544 - INFO - Updated indices: 5000 to 10000
2024-11-06 10:05:34,038 - INFO - Updated indices: 10000 to 15000
2024-11-06 10:05:36,948 - INFO - Updated indices: 15000 to 20000
2024-11-06 10:05:39,471 - INFO - Updated indices: 20000 to 25000
2024-11-06 10:05:41,993 - INFO - Updated indices: 25000 to 30000
2024-11-06 10:05:44,957 - INFO - Updated indices: 30000 to 35000
2024-11-06 10:05:47,338 - INFO - Updated indices: 35000 to 40000
2024-11-06 10:05:49,882 - INFO - Updated indices: 40000 to 45000
2024-11-06 10:05:52,275 - INFO - Updated indices: 45000 to 50000
2024-11-06 10:05:54,910 - INFO - Updated indices: 50000 to 55000
2024-11-06 10:05:57,786 - INFO - Updated indices: 55000 to 60000
2024-11-06 10:06:00,919 - INFO - Updated indices: 60000 to 65000
2024-11-06 10:06:03,698 - INFO - Updated indices: 65000 to 70000
2024-11-06 10:06:06,229 - INFO - Updated indices: 70000 to 75000
2024-11-06 10:06:08,848 - INFO 

[<deriva.core.datapath._ResultSet at 0x7ff330a3d2a0>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3c7f0>,
 <deriva.core.datapath._ResultSet at 0x7ff339433100>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3ed70>,
 <deriva.core.datapath._ResultSet at 0x7ff33f55c670>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3e9e0>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3f490>,
 <deriva.core.datapath._ResultSet at 0x7ff33f4a1e10>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3d210>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3e410>,
 <deriva.core.datapath._ResultSet at 0x7ff33f4a0490>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3e110>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3eb00>,
 <deriva.core.datapath._ResultSet at 0x7ff33f4a1300>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3e710>,
 <deriva.core.datapath._ResultSet at 0x7ff33f4a27d0>,
 <deriva.core.datapath._ResultSet at 0x7ff33f4a22c0>,
 <deriva.core.datapath._ResultSet at 0x7ff330a3e4d0>,
 <deriva.core.datapath._Resu

In [17]:
# EA.model.schemas['eye-ai'].tables['Diagnosis'].drop()
# EA.model.schemas['eye-ai'].tables['Image_Diagnosis'].alter(table_name='Diagnosis')

<deriva.core.ermrest_model.Table object 'eye-ai'.'Diagnosis' at 0x7fdcf3506e90>

In [None]:
image_annot_table = EA.domain_schema_instance.tables['Image_Annotation']
image_annot_entities = pd.DataFrame(image_annot_table.path.entities())
image_annot_ingest = image_annot_entities.drop(['RID', 'RID', 'RCT', 'RMT', 'RCB', 'RMB'], axis=1)
image_annot_ingest['Feature_Name'] = "Image_Annotation"

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in image_annot_ingest.to_dict(orient='records')
]

new_image_annot_feature = EA.domain_schema_instance.tables['Execution_Image_Image_Annotation']
insert(new_image_annot_feature, ingest_list, 5000)

In [None]:
table = EA.model.schemas['eye-ai'].tables['Image_Annotation']
table.drop()
EA.model.schemas['eye-ai'].tables['Execution_Image_Image_Annotation'].alter(table_name = 'Image_Annotation')

In [None]:
image_annot_table = EA.domain_schema_instance.tables['Image_Annotation']
image_annot_entities = pd.DataFrame(image_annot_table.path.entities())
image_annot_ingest = image_annot_entities[['RID', 'Feature_Name']]
image_annot_ingest['Feature_Name'] = "Annotation"

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in image_annot_ingest.to_dict(orient='records')
]

new_image_annot_feature = EA.domain_schema_instance.tables['Image_Annotation']
update(new_image_annot_feature, ingest_list, 5000)