In [1]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "deriva-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [2]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
from deriva.chisel import Model, Schema, Table, Column, Key, ForeignKey, builtin_types
from deriva_ml.deriva_ml_base import DerivaML, DerivaMLException, FileUploadState, UploadState, ColumnDefinition, BuiltinTypes

# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [3]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# catalog_id = "362" #@param
catalog_id = "428" #@param
# 411 412 426
# host = 'www.eye-ai.org'
host = 'dev.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

2024-10-25 14:48:36,262 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-10-25 14:48:36,263 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


You are already logged in.


In [4]:
cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.
EA = EyeAI(hostname = host, catalog_id = catalog_id, 
           cache_dir= cache_dir, working_dir=working_dir)

2024-10-25 14:48:36,288 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-10-25 14:48:36,289 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


In [36]:
def insert(table, mapping_list, num_up):
    n = len(mapping_list)
    for i in range(n // num_up):
        table.insert(mapping_list[i * num_up: (i + 1) * num_up])
        logging.info(f"Updated indices: {i * num_up} to {(i + 1) * num_up}")
    remaining_start = (i + 1) * num_up if n >= num_up else 0
    if remaining_start < n:
        table.insert(mapping_list[remaining_start: n])
        logging.info(f"Updated indices: {remaining_start} to {n}")

In [34]:
column_cdratio = ColumnDefinition(
    name='Cup/Disk_Ratio',
    type=BuiltinTypes.float4,
    nullok=True
)

column_comemnts = ColumnDefinition(
    name='Comments',
    type=builtin_types.text,
    nullok=True
)
Process_table = EA.model.schemas['eye-ai'].tables['Process']

feature_diagnosis = EA.create_feature(feature_name = "Image_Diagnosis", 
                                      table = "Image", 
                                      terms = ["Diagnosis_Image", "Image_Quality", "Diagnosis_Tag", "Diagnosis_Status"], 
                                      metadata = [column_cdratio, column_comemnts, Process_table],
                                      comment="Image level diagnosis")

EA.model.schemas['eye-ai'].tables['Execution_Image_Image_Diagnosis'].alter(table_name='Image_Diagnosis')
table = EA.model.schemas['eye-ai'].tables['Image_Diagnosis']
process_column = table.column_definitions['Process']
process_column.alter(nullok=True)
execution_column = table.column_definitions['Execution']
execution_column.alter(nullok=True)
for col in ["Diagnosis_Image", "Image_Quality", "Diagnosis_Tag", "Diagnosis_Status"]:
    column_def = table.column_definitions[col]
    column_def.alter(nullok=True)

feature_image_annot = EA.create_feature(feature_name = "Image_Annotation", 
                                        table = "Image", 
                                        terms = ["Annotation_Function", "Annotation_Type"], 
                                        assets = ["Execution_Assets"],
                                        comment="Lateraliity of fundus images")
                                        
EA.model.schemas['eye-ai'].tables['Execution_Image_Image_Annotation'].alter(table_name='Image_Diagnosis')
table = EA.model.schemas['eye-ai'].tables['Execution_Image_Image_Annotation']
execution_column = table.column_definitions['Execution']
execution_column.alter(nullok=True)

In [17]:
image_annot_table = EA.domain_schema_instance.tables['Image_Annotation']
image_annot_entities = pd.DataFrame(image_annot_table.path.entities())
image_annot_ingest = image_annot_entities.drop(['RID', 'RID', 'RCT', 'RMT', 'RCB', 'RMB'], axis=1)
image_annot_ingest['Feature_Name'] = "Image_Annotation"

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in image_annot_ingest.to_dict(orient='records')
]

new_image_annot_feature = EA.domain_schema_instance.tables['Execution_Image_Image_Annotation']
insert(new_image_annot_feature, ingest_list, 5000)

2024-10-25 14:51:31,508 - INFO - Updated indices: 0 to 5000
2024-10-25 14:51:33,787 - INFO - Updated indices: 5000 to 10000
2024-10-25 14:51:35,941 - INFO - Updated indices: 10000 to 15000
2024-10-25 14:51:36,004 - INFO - Updated indices: 15000 to 15111


In [37]:
diag_table = EA.domain_schema_instance.tables['Diagnosis']
diag_entities = pd.DataFrame(diag_table.path.entities())
diag_ingest = diag_entities.drop(['RID', 'RID', 'RCT', 'RMT', 'RCB', 'RMB'], axis=1)
diag_ingest['Feature_Name'] = 'Diagnosis'
ingest_list = diag_ingest.to_dict(orient='records')

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in diag_ingest.to_dict(orient='records')
]
new_diag_feature = EA.domain_schema_instance.tables['Image_Diagnosis']
insert(new_diag_feature, ingest_list, 5000)

2024-10-25 16:09:39,735 - INFO - Updated indices: 0 to 5000
2024-10-25 16:09:42,380 - INFO - Updated indices: 5000 to 10000
2024-10-25 16:09:44,935 - INFO - Updated indices: 10000 to 15000
2024-10-25 16:09:47,736 - INFO - Updated indices: 15000 to 20000
2024-10-25 16:09:50,519 - INFO - Updated indices: 20000 to 25000
2024-10-25 16:09:53,646 - INFO - Updated indices: 25000 to 30000
2024-10-25 16:09:56,816 - INFO - Updated indices: 30000 to 35000
2024-10-25 16:09:59,555 - INFO - Updated indices: 35000 to 40000
2024-10-25 16:10:02,190 - INFO - Updated indices: 40000 to 45000
2024-10-25 16:10:04,908 - INFO - Updated indices: 45000 to 50000
2024-10-25 16:10:07,705 - INFO - Updated indices: 50000 to 55000
2024-10-25 16:10:10,579 - INFO - Updated indices: 55000 to 60000
2024-10-25 16:10:13,823 - INFO - Updated indices: 60000 to 65000
2024-10-25 16:10:16,876 - INFO - Updated indices: 65000 to 70000
2024-10-25 16:10:20,143 - INFO - Updated indices: 70000 to 75000
2024-10-25 16:10:23,039 - INFO 

In [19]:
table = EA.model.schemas['eye-ai'].tables['Image_Annotation']
table.drop()
EA.model.schemas['eye-ai'].tables['Execution_Image_Image_Annotation'].alter(table_name = 'Image_Annotation')

In [26]:
image_annot_table = EA.domain_schema_instance.tables['Image_Annotation']
image_annot_entities = pd.DataFrame(image_annot_table.path.entities())
image_annot_ingest = image_annot_entities[['RID', 'Feature_Name']]
image_annot_ingest['Feature_Name'] = "Annotation"

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in image_annot_ingest.to_dict(orient='records')
]

new_image_annot_feature = EA.domain_schema_instance.tables['Image_Annotation']
update(new_image_annot_feature, ingest_list, 5000)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  image_annot_ingest['Feature_Name'] = "Annotation"
2024-10-25 15:37:48,022 - INFO - Updated indices: 0 to 5000
2024-10-25 15:37:50,160 - INFO - Updated indices: 5000 to 10000
2024-10-25 15:37:51,988 - INFO - Updated indices: 10000 to 15000
2024-10-25 15:37:52,052 - INFO - Updated indices: 15000 to 15111


In [29]:
diag_table = EA.domain_schema_instance.tables['Image_Diagnosis']
diag_entities = pd.DataFrame(diag_table.path.entities())
diag_ingest = diag_entities[['RID', 'Feature_Name']]
diag_ingest['Feature_Name'] = "Diagnosis"

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in diag_ingest.to_dict(orient='records')
]

new_diag_feature = EA.domain_schema_instance.tables['Image_Diagnosis']
update(new_diag_feature, ingest_list, 5000)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  diag_ingest['Feature_Name'] = "Diagnosis"
2024-10-25 15:48:10,766 - INFO - Updated indices: 0 to 5000
2024-10-25 15:48:13,025 - INFO - Updated indices: 5000 to 10000
2024-10-25 15:48:15,514 - INFO - Updated indices: 10000 to 15000
2024-10-25 15:48:17,146 - INFO - Updated indices: 15000 to 20000
2024-10-25 15:48:18,670 - INFO - Updated indices: 20000 to 25000
2024-10-25 15:48:20,449 - INFO - Updated indices: 25000 to 30000
2024-10-25 15:48:22,454 - INFO - Updated indices: 30000 to 35000
2024-10-25 15:48:24,117 - INFO - Updated indices: 35000 to 40000
2024-10-25 15:48:26,159 - INFO - Updated indices: 40000 to 45000
2024-10-25 15:48:28,517 - INFO - Updated indices: 45000 to 50000
2024-10-25 15:48:30,566 - INFO - Updated indices:

In [38]:
EA.model.schemas['eye-ai'].tables['Diagnosis'].drop()
EA.model.schemas['eye-ai'].tables['Image_Diagnosis'].alter(table_name='Diagnosis')

<deriva.core.ermrest_model.Table object 'eye-ai'.'Diagnosis' at 0x7fbed2d6fe20>