# Connect Eye-AI and Load Libraries

In [1]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# # Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-exec"))

In [2]:
# Prerequisites
import json
import os

# EyeAI, Deriva, VGG19
from deriva_ml import DatasetSpec, DatasetBag, Workflow, ExecutionConfiguration, VersionPart
from deriva_ml import MLVocab as vc
from eye_ai.eye_ai import EyeAI
from models.vgg19 import vgg19_diagnosis_train

# ML Analytics
import pandas as pd
import numpy as np
from sklearn.calibration import calibration_curve
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Other Utilities
from pathlib import Path, PurePath
import logging
from datetime import datetime

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

2025-09-22 14:51:48.078512: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-09-22 14:51:48.078560: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-09-22 14:51:48.079635: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-22 14:51:48.086346: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
host = 'www.eye-ai.org'
#host = 'dev.eye-ai.org' #for dev testing
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

2025-09-22 14:51:49,617 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-09-22 14:51:49,618 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


You are already logged in.


# Configuration

In [4]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

2025-09-22 14:51:53,469 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-09-22 14:51:53,470 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


In [5]:

EA.increment_dataset_version(dataset_rid='4-Z6K8', component= VersionPart.patch, description='Update after annotations added')

DatasetVersion(major=0, minor=5, patch=3)

In [None]:
source_dataset = "4-Z6K8" # New LAC test (balanced)

asset_RID = ["4-MWQ6"]  # VGG19 cropped
crop = True

#asset_RID = ["4-MWQ8"]  # VGG19 uncropped
#crop = False

preds_workflow = EA.add_workflow( 
    Workflow(
        name="VGG Predictions by KB",
        url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/Sandbox_KB/VGG_Predict.ipynb",
        workflow_type="Test Workflow",
        )
    )

config = ExecutionConfiguration(
    datasets=[ DatasetSpec(rid=source_dataset, version=EA.dataset_version(source_dataset), materialize=True) ],
    assets=asset_RID,
    workflow=preds_workflow,
    description=f"Instance of creating VGG-19 predictions: Asset {asset_RID} on {source_dataset} with Crop = {crop}",
    )

execution = EA.create_execution(config)

2025-09-22 14:53:03,341 - INFO - Materialize bag 4-Z6K8... 
2025-09-22 14:53:03,623 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-09-22 14:53:03,624 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2025-09-22 14:53:05,214 - INFO - Creating new MINID for dataset 4-Z6K8
2025-09-22 14:53:05,974 - INFO - Downloading dataset minid for catalog: 4-Z6K8@0.5.3
2025-09-22 14:53:05,975 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-09-22 14:53:05,976 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2025-09-22 14:53:06,011 - INFO - Processing export config file: /tmp/tmphy1ow_2t/download_spec.j

In [7]:
print(execution)

caching_dir: /data
_working_dir: /data/kb_766/EyeAI_working
execution_rid: 5-50TW
workflow_rid: 4-YSP4
asset_paths: {'Execution_Asset': [AssetFilePath('/data/kb_766/EyeAI_working/deriva-ml/execution/5-50TW/downloaded-assets/Execution_Asset/VGG19_FULL_Images_Cropped_2-277G_Feb_14_2025.h5')]}
configuration: datasets=[DatasetSpec(rid='4-Z6K8', materialize=True, version=DatasetVersion(major=0, minor=5, patch=1))] assets=['4-MWQ6'] workflow='4-YSP4' parameters={} description='Instance of creating VGG19 predictions: VGG19 Uncropped on 4-YWKJ USC Test' argv=['/home/kb_766/.conda/envs/my-tensorflow-conda/lib/python3.10/site-packages/ipykernel_launcher.py', '-f', '/home/kb_766/.local/share/jupyter/runtime/kernel-7a3ec659-cea5-424a-a5a0-5c5a60ac5bf2.json']


In [10]:
output_dir = execution._working_dir / execution.execution_rid
output_dir.mkdir(parents=True, exist_ok=True)
output_dir

PosixPath('/data/kb_766/EyeAI_working/5-50TW')

# Get Pertinent Datasets

In [11]:
ds_bag_test = execution.datasets[0]

In [12]:
# FOR LAC DATA

test_image_path_cropped, test_csv_cropped = EA.create_cropped_images(ds_bag = ds_bag_test,
                                                                     output_dir = output_dir / "dataset" / "test",
                                                                     crop_to_eye = crop)

In [None]:
# FOR USC MULTIMODAL DATA
# Group files as glaucoma/not for RETFound evaluation

imageDF = ds_bag_test.get_table_as_dataframe('Image')
annotation_bounding_box =  pd.merge( ds_bag_test.get_table_as_dataframe('Annotation')[['Image', 'Fundus_Bounding_Box']], 
                                                ds_bag_test.get_table_as_dataframe('Fundus_Bounding_Box'), 
                                                left_on='Fundus_Bounding_Box', 
                                                right_on='RID')

data_path = output_dir / "dataset"
output_path =  data_path / "test"
output_path_no = output_path / "No_Glaucoma"
output_path_glaucoma = output_path / "Suspected_Glaucoma"

output_path.mkdir(parents=True, exist_ok=True)
output_path_no.mkdir(parents=True, exist_ok=True)
output_path_glaucoma.mkdir(parents=True, exist_ok=True)

for index, row in imageDF.iterrows():     
    src_path = row["Filename"]
    image_rid = row["RID"]
    dest_name = image_rid + ".jpg"
    if crop: dest_name = "Crop_" + dest_name

    label = image_rid.split( sep = "-" )[0]
    if label == "4":
        dest_path = os.path.join(output_path_no, dest_name)
    elif label == "2":
        dest_path = os.path.join(output_path_glaucoma, dest_name)
    else: 
        continue    
    
    if crop:
        image = Image.open(src_path)
        svg_path = annotation_bounding_box.loc[annotation_bounding_box['Image'] == image_rid, 'Filename'].values[0]
        svg_path = Path(svg_path)
        if not svg_path.exists():
          continue
        bbox = EA.get_bounding_box(svg_path)
        cropped_image = image.crop(bbox)
        cropped_image.save(dest_path)
    else:
        shutil.copy2(src_path, dest_path)

output_path

In [17]:
! ls '/data/kb_766/EyeAI_working/5-50TW/dataset/test/Suspected_Glaucoma'

Cropped_2-CXEE.JPG  Cropped_2-D070.JPG	Cropped_2-D47G.JPG  Cropped_2-D7YT.JPG
Cropped_2-CXEJ.JPG  Cropped_2-D072.JPG	Cropped_2-D48Y.JPG  Cropped_2-D7Z0.JPG
Cropped_2-CXFM.JPG  Cropped_2-D082.JPG	Cropped_2-D492.JPG  Cropped_2-D7Z4.JPG
Cropped_2-CXFT.JPG  Cropped_2-D086.JPG	Cropped_2-D49E.JPG  Cropped_2-D7ZA.JPG
Cropped_2-CXH4.JPG  Cropped_2-D08J.JPG	Cropped_2-D49G.JPG  Cropped_2-D8CC.JPG
Cropped_2-CXH6.JPG  Cropped_2-D08M.JPG	Cropped_2-D4D2.JPG  Cropped_2-D8CM.JPG
Cropped_2-CXK0.JPG  Cropped_2-D0A6.JPG	Cropped_2-D4D6.JPG  Cropped_2-D8DM.JPG
Cropped_2-CXK6.JPG  Cropped_2-D0A8.JPG	Cropped_2-D4DM.JPG  Cropped_2-D8DP.JPG
Cropped_2-CXM4.JPG  Cropped_2-D0EE.JPG	Cropped_2-D4DT.JPG  Cropped_2-D8F0.JPG
Cropped_2-CXM8.JPG  Cropped_2-D0EM.JPG	Cropped_2-D4GR.JPG  Cropped_2-D8F8.JPG
Cropped_2-CXS2.JPG  Cropped_2-D0GM.JPG	Cropped_2-D4GW.JPG  Cropped_2-D8FE.JPG
Cropped_2-CXSA.JPG  Cropped_2-D0GP.JPG	Cropped_2-D4H2.JPG  Cropped_2-D8FG.JPG
Cropped_2-CXT8.JPG  Cropped_2-D0GW.JPG	Cropped_2-D4H4.JPG  Cropp

In [18]:
asset_output_dir = execution._working_dir / execution.execution_rid / "asset"
asset_output_dir.mkdir( parents=True, exist_ok=True )

In [19]:
current_date = datetime.now().strftime("%b_%d_%Y") 

In [20]:
model_path = str(execution.asset_paths['Execution_Asset'][0])

In [21]:
test_image_path_cropped

PosixPath('/data/kb_766/EyeAI_working/5-50TW/dataset/test')

In [27]:

with execution.execute() as exec:
        predictions_results, metrics_summary = vgg19_diagnosis_train.evaluate_only(
            model_path = model_path, 
            model_name = f"VGG19_{crop}Crop_Model_{ds_bag_test.dataset_rid}_{current_date}", 
            test_path = test_image_path_cropped, 
            output_dir = asset_output_dir,
            classes = {'No_Glaucoma': 0, 'Suspected_Glaucoma': 1}
        )
        print("Execution Results:")
        print(predictions_results, metrics_summary)

2025-09-13 07:36:21,044 - INFO - Start execution  ...
2025-09-13 07:36:22,629 - INFO - Start execution  ...


Found 656 images belonging to 2 classes.


2025-09-13 07:36:35,545 - INFO - Predictions saved to VGG19_TrueCrop_Model_4-Z6K8_Sep_12_2025_predictions_results.csv
2025-09-13 07:36:35,545 - INFO - Metrics saved to VGG19_TrueCrop_Model_4-Z6K8_Sep_12_2025_metrics_summary.csv
2025-09-13 07:36:35,546 - INFO - Successfully run Ml.
2025-09-13 07:36:35,617 - INFO - Algorithm execution ended.


Execution Results:
/data/kb_766/EyeAI_working/5-50TW/asset/VGG19_TrueCrop_Model_4-Z6K8_Sep_12_2025_predictions_results.csv /data/kb_766/EyeAI_working/5-50TW/asset/VGG19_TrueCrop_Model_4-Z6K8_Sep_12_2025_metrics_summary.csv


In [29]:
pd.read_csv('/data/kb_766/EyeAI_working/5-50TW/asset/VGG19_TrueCrop_Model_4-Z6K8_Sep_12_2025_predictions_results.csv')

Unnamed: 0,Filename,True Label,Prediction,Probability Score
0,No_Glaucoma/Cropped_2-DCKR.JPG,0.0,1,0.513919
1,No_Glaucoma/Cropped_2-DCKY.JPG,0.0,0,0.414791
2,No_Glaucoma/Cropped_2-DCN0.JPG,0.0,1,0.501660
3,No_Glaucoma/Cropped_2-DCN2.JPG,0.0,1,0.778332
4,No_Glaucoma/Cropped_2-DCPP.JPG,0.0,0,0.039846
...,...,...,...,...
651,Suspected_Glaucoma/Cropped_2-DC70.JPG,1.0,1,0.948401
652,Suspected_Glaucoma/Cropped_2-DC8A.JPG,1.0,0,0.490659
653,Suspected_Glaucoma/Cropped_2-DC8C.JPG,1.0,0,0.132261
654,Suspected_Glaucoma/Cropped_2-DCE0.JPG,1.0,1,0.970269


In [30]:
pd.read_csv( '/data/kb_766/EyeAI_working/5-50TW/asset/VGG19_TrueCrop_Model_4-Z6K8_Sep_12_2025_metrics_summary.csv' )

Unnamed: 0,Metric,Value
0,F1 Score,0.756345
1,Precision,0.663697
2,Recall,0.879056
3,Accuracy,0.707317
4,ROC-AUC,0.817733


In [None]:
preds = pd.read_csv( predictions_results )
preds['Filename'].str.split( pat = "/", expand = True)[0].value_counts()

In [None]:
# Calibration curve
prob_true, prob_pred = calibration_curve( preds["True Label"], preds["Probability Score"], n_bins=10, strategy='uniform')
plt.plot(prob_pred, prob_true, marker='o', label='Model')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly calibrated')
plt.xlabel('Mean predicted probability')
plt.ylabel('Fraction of positives')
plt.title('Calibration curve')
plt.legend()
plt.show()

In [None]:
# Gather patient data

preds['Image'] = preds['Filename'].apply(lambda x: x.split("_")[2].split(".")[0])

# Link to diagnosis data
linkdDF = pd.merge( preds,
    ds_bag_test.get_table_as_dataframe('Image_Diagnosis')[['Image','Diagnosis_Image']],
    on = 'Image', how = 'left' 
)

# Link to image data
linkdDF = pd.merge( linkdDF,
                   ds_bag_test.get_table_as_dataframe('Image')[['RID', 'Observation']],
                   left_on = 'Image', right_on = 'RID', 
                   how = 'left')
linkdDF = linkdDF.drop('RID', axis = 1)

# Link to observation data
linkdDF = pd.merge( linkdDF,
                   ds_bag_test.get_table_as_dataframe('Observation')[['RID', 'Subject']],
                   left_on = 'Observation', right_on = 'RID', 
                   how = 'left')
linkdDF = linkdDF.drop('RID', axis = 1)

linkdDF = pd.merge( linkdDF,
                   ds_bag_test.get_table_as_dataframe('Subject')[['RID', 'Subject_Gender', 'Subject_Ethnicity']],
                   left_on = 'Subject', right_on = 'RID', 
                   how = 'left')
linkdDF = linkdDF.drop('RID', axis = 1)

linkdDF['Subject_Ethnicity'] = linkdDF['Subject_Ethnicity'].astype('category')


In [None]:
linkdDF['Subject_Gender'].value_counts()/656, linkdDF['Subject_Ethnicity'].value_counts()/656

In [None]:
# Explore performance metrics

# Confirm AUC
fpr, tpr, thresholds = roc_curve( preds["True Label"], preds["Probability Score"])
print( "Overall AUC" )
print( auc(fpr, tpr) )

# Check AUC by ethnicity
for e in pd.unique( linkdDF['Subject_Ethnicity'] ):
    tDF = linkdDF[ linkdDF['Subject_Ethnicity'] == e ]
    fpr, tpr, thresholds = roc_curve( tDF["True Label"], tDF["Probability Score"])
    print( e )
    print( auc(fpr, tpr) )

# Check AUC by gender
for g in pd.unique( linkdDF['Subject_Gender'] ):
    tDF = linkdDF[ linkdDF['Subject_Gender'] == g ]
    fpr, tpr, thresholds = roc_curve( tDF["True Label"], tDF["Probability Score"])
    print( g )
    print( auc(fpr, tpr) )

In [None]:
linkdDF[ (linkdDF[ 'Diagnosis_Image' ] == 'Suspected Glaucoma') & (linkdDF[ 'Prediction' ] == 0) ]

# Upload Results

In [33]:
# # crete asset path
asset_type_name = "Diagnosis_Analysis"
asset_path = execution.execution_asset_path(asset_type_name)

print(asset_path)

# # save assets to asset_path
# linkdDF.to_csv(asset_path/'ImagesToVGG19.csv', index=False)

# upload assets to catalog
# execution.upload_execution_outputs(clean_folder=True)

AttributeError: 'Execution' object has no attribute 'execution_asset_path'