# Connect Eye-AI and Load Libraries

In [1]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# # Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-exec"))

In [2]:
# Prerequisites
import json
import os
import shutil
from PIL import Image

# EyeAI, Deriva, VGG19
from deriva_ml import DatasetSpec, DatasetBag, Workflow, ExecutionConfiguration, VersionPart
from deriva_ml import MLVocab as vc
from eye_ai.eye_ai import EyeAI

# ML Analytics
import pandas as pd
import numpy as np
import sklearn.metrics as sk

# Other Utilities
from pathlib import Path, PurePath
import logging
from datetime import datetime

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [3]:
import sklearn
import sklearn.metrics as skm
print(sklearn.__version__)


1.7.1


In [4]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
host = 'www.eye-ai.org'
#host = 'dev.eye-ai.org' #for dev testing
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

2025-08-28 20:27:13,082 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-08-28 20:27:13,083 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


You are already logged in.


# Configuration

In [5]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

2025-08-28 20:27:14,669 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-08-28 20:27:14,669 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


In [6]:


#ml_instance.increment_dataset_version(dataset_rid='2-N93J', component= VersionPart.patch, description='Update to latest deriva-ml schema')

In [7]:
datasets = [ '4-411G', '2-C9PR' ]#"2-39FY" ]

data_to_download = []
for dataset in datasets:
    ds_dict = {
        'rid': dataset,
        'materialize':False,
        'version':EA.dataset_version(dataset_rid=dataset),
    }
    data_to_download.append(ds_dict)

dataset_workflow = EA.add_workflow( 
    Workflow(
        name="Make Dataset by KB",
        url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/Sandbox_KB/Make_Dataset.ipynb",
        workflow_type="Test Workflow",
        )
    )

config = ExecutionConfiguration(
    datasets=data_to_download,
    workflow=dataset_workflow,
    description="Test"
    )

execution = EA.create_execution(config)

2025-08-28 20:27:18,263 - INFO - Materialize bag 4-411G... 
2025-08-28 20:27:18,471 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-08-28 20:27:18,472 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2025-08-28 20:27:20,077 - INFO - Using cached bag for  4-411G Version:2.7.0
2025-08-28 20:27:20,078 - INFO - Loading /data/4-411G_37ce9366c961c832931248ef462255860bc8595e0165040faa43113eba38ce45/Dataset_4-411G
2025-08-28 20:27:20,845 - INFO - Creating new database for dataset: 4-411G in /data/kb_766/EyeAI_working/4-411G@33N-3CNE-1XFP.db
2025-08-28 20:27:20,846 - INFO - Materialize bag 2-C9PR... 
2025-08-28 20:27:21,063 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2025-08-28 20:27:21,064 - INFO - Finished ini

In [8]:
print(execution)

caching_dir: /data
_working_dir: /data/kb_766/EyeAI_working
execution_rid: 5-4XCP
workflow_rid: 4-Z5Y4
asset_paths: {}
configuration: datasets=[DatasetSpec(rid='4-411G', materialize=False, version=DatasetVersion(major=2, minor=7, patch=0)), DatasetSpec(rid='2-C9PR', materialize=False, version=DatasetVersion(major=2, minor=7, patch=0))] assets=[] workflow='4-Z5Y4' parameters={} description='Test' argv=['/home/kb_766/.conda/envs/my-tensorflow-conda/lib/python3.10/site-packages/ipykernel_launcher.py', '-f', '/home/kb_766/.local/share/jupyter/runtime/kernel-2f5e238d-101c-4d36-b517-990edcf7b75a.json']


# Work with Data

In [9]:
ds_img = execution.datasets[0]
ds_test = execution.datasets[1]

# My Grading

In [21]:
# Get disc-only grading ME
disc_grades = pd.read_csv('Multimodal test set grading - disc only - KB 8-12-25.csv')
disc_grades = disc_grades.drop(['Random_Ind','Comments'], axis=1)
disc_grades = disc_grades.dropna(subset=['Glaucoma','GS+Mild_Mod+Severe'])
disc_grades = disc_grades.rename(columns={'Glaucoma':'Disc_Grade', 'GS+Mild_Mod+Severe':'Disc_Severity'})
disc_grades = disc_grades[['RID_Subject', 'Eye', 'Disc_Grade', 'Disc_Severity']]

In [22]:
# Get full testing grading
full_grades = pd.read_csv('testset_compiled_graded_adjudicated.csv')
full_grades = full_grades[['RID_Subject', 'Side', 'Grade_grader2', 'Grader 2']]
full_grades = full_grades.rename(columns={'Grade_grader2':'Full_Grade', 'Grader 2':'Grader', 'Side':'Eye'})
full_grades = full_grades.dropna(subset=['Full_Grade'])

In [23]:
mergeDF = pd.merge( disc_grades,
                   full_grades[full_grades['Grader'] == 'KB'],
                   on = ['RID_Subject', 'Eye'],
                   how = 'inner'
                  )

mergeDF = pd.merge( mergeDF,
                   ds_img.get_table_as_dataframe('Execution_Subject_Chart_Label')[['Subject', 'Image_Side', 'Condition_Label']],
                   left_on = ['RID_Subject', 'Eye'], right_on = ['Subject', 'Image_Side'],
                   how = 'left'
                  )[['RID_Subject', 'Eye', 'Disc_Grade', 'Disc_Severity', 'Full_Grade', 'Condition_Label']]

mergeDF = pd.merge( mergeDF,
                   EA.multimodal_wide(ds_test)[['RID_Subject', 'Image_Side', 'MD']],
                   left_on = ['RID_Subject', 'Eye'], right_on = ['RID_Subject', 'Image_Side'],
                   how = 'left'
                  )[['RID_Subject', 'Eye', 'Disc_Grade', 'Disc_Severity', 'Full_Grade', 'Condition_Label', 'MD']]

mergeDF = mergeDF.rename(columns={'Condition_Label':'Chart_Label'})
mergeDF['Binary_Disc_Grade'] = mergeDF['Disc_Grade'].map({'0-suspect': 0, '1-glaucoma': 1})
mergeDF['Binary_Disc_Severity'] = mergeDF['Disc_Severity'].map({'0-GS/Mild': 0, '1-Mod/Severe': 1})
mergeDF['Binary_Full_Grade'] = mergeDF['Full_Grade'].map({'GS': 0, 'Glaucoma': 1})
mergeDF['Binary_Label'] = mergeDF['Chart_Label'].map({'GS': 0, 'POAG': 1, 'PACG': 1})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hvf_clean.loc[:, 'priority'] = hvf_clean['Field_Size'].map(priority)


In [24]:
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt

def prep_stats(y_true=None, y_pred=None, cm=None):
    if cm is None and y_true is not None and y_pred is not None:
        cm = sk.confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    sensitivity = tp / (tp + fn)       # Recall, True Positive Rate
    specificity = tn / (tn + fp)       # True Negative Rate

    print(f"Accuracy   : {accuracy:.3f}")
    print(f"Sensitivity: {sensitivity:.3f}")
    print(f"Specificity: {specificity:.3f}")

    return(cm)

In [25]:
# GS vs. Glaucoma

left_out = widgets.Output()
right_out = widgets.Output()

print("KB: GS vs. Glaucoma  ---------------------")

# Fill the left output
with left_out:
    print("Disc Grade")
    cm_KB_G_D = prep_stats(y_true=mergeDF['Binary_Label'], y_pred=mergeDF['Binary_Disc_Grade'])
    sk.ConfusionMatrixDisplay(cm_KB_G_D).plot(cmap='Blues')
    plt.show()
    
# Fill the right output
with right_out:
    print("Full Testing Grade")
    cm_KB_G_F = prep_stats(y_true=mergeDF['Binary_Label'], y_pred=mergeDF['Binary_Full_Grade'])
    sk.ConfusionMatrixDisplay(cm_KB_G_F).plot(cmap='Blues')
    plt.show()

# Display side-by-side
box = widgets.HBox([left_out, right_out])
display(box)

KB: GS vs. Glaucoma  ---------------------


HBox(children=(Output(), Output()))

In [26]:
y_true = (mergeDF['Binary_Label'] == 1) & (mergeDF['MD'] < -6)
y_pred = (mergeDF['Binary_Disc_Grade'] == 1)  & (mergeDF['MD'] < -6)

# GS+Mild vs. Mod+Severe Glaucoma

left_out = widgets.Output()
right_out = widgets.Output()

print("KB: GS+Mild vs. Mod+Severe Glaucoma  ---------------------")

# Fill the left output
with left_out:
    print("Disc Grade")
    cm_KB_S_D = prep_stats(
        y_true=(mergeDF['Binary_Label'] == 1) & (mergeDF['MD'] < -6), 
        y_pred=mergeDF['Binary_Disc_Severity']
    )
    sk.ConfusionMatrixDisplay(cm_KB_S_D).plot(cmap='Blues')
    plt.show()
    
# Fill the right output
with right_out:
    print("Full Testing Grade")
    cm_KB_S_F = prep_stats(
        y_true=(mergeDF['Binary_Label'] == 1) & (mergeDF['MD'] < -6),
        y_pred=(mergeDF['Binary_Full_Grade'] == 1) & (mergeDF['MD'] < -6)
    )
    sk.ConfusionMatrixDisplay(cm_KB_S_F).plot(cmap='Blues')
    plt.show()

# Display side-by-side
box = widgets.HBox([left_out, right_out])
display(box)

KB: GS+Mild vs. Mod+Severe Glaucoma  ---------------------


HBox(children=(Output(), Output()))

# Van's Grading

In [27]:
# Get disc-only grading VAN
disc_grades = pd.read_csv('Multimodal test set grading - disc only - VN 8-15-25.csv')
disc_grades = disc_grades.drop(['Random_Ind'], axis=1)
disc_grades = disc_grades.dropna(subset=['Glaucoma','GS+Mild_Mod+Severe'])
disc_grades = disc_grades.rename(columns={'Glaucoma':'Disc_Grade', 'GS+Mild_Mod+Severe':'Disc_Severity'})
disc_grades = disc_grades[['RID_Subject', 'Eye', 'Disc_Grade', 'Disc_Severity']]

In [28]:
mergeDF = pd.merge( disc_grades,
                   full_grades[full_grades['Grader'] == 'VN'],
                   on = ['RID_Subject', 'Eye'],
                   how = 'inner'
                  )

mergeDF = pd.merge( mergeDF,
                   ds_img.get_table_as_dataframe('Execution_Subject_Chart_Label')[['Subject', 'Image_Side', 'Condition_Label']],
                   left_on = ['RID_Subject', 'Eye'], right_on = ['Subject', 'Image_Side'],
                   how = 'left'
                  )[['RID_Subject', 'Eye', 'Disc_Grade', 'Disc_Severity', 'Full_Grade', 'Condition_Label']]

mergeDF = pd.merge( mergeDF,
                   EA.multimodal_wide(ds_test)[['RID_Subject', 'Image_Side', 'MD']],
                   left_on = ['RID_Subject', 'Eye'], right_on = ['RID_Subject', 'Image_Side'],
                   how = 'left'
                  )[['RID_Subject', 'Eye', 'Disc_Grade', 'Disc_Severity', 'Full_Grade', 'Condition_Label', 'MD']]

mergeDF = mergeDF.rename(columns={'Condition_Label':'Chart_Label'})
mergeDF['Binary_Disc_Grade'] = mergeDF['Disc_Grade'].map({'0-suspect': 0, '1-glaucoma': 1})
mergeDF['Binary_Disc_Severity'] = mergeDF['Disc_Severity'].map({'0-GS/Mild': 0, '1-Mod/Severe': 1})
mergeDF['Binary_Full_Grade'] = mergeDF['Full_Grade'].map({'GS': 0, 'Glaucoma': 1})
mergeDF['Binary_Label'] = mergeDF['Chart_Label'].map({'GS': 0, 'POAG': 1, 'PACG': 1})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hvf_clean.loc[:, 'priority'] = hvf_clean['Field_Size'].map(priority)


In [29]:
# GS vs. Glaucoma

left_out = widgets.Output()
right_out = widgets.Output()

print("VN: GS vs. Glaucoma  ---------------------")

# Fill the left output
with left_out:
    print("Disc Grade")
    cm_VN_G_D = prep_stats(y_true=mergeDF['Binary_Label'], y_pred=mergeDF['Binary_Disc_Grade'])
    sk.ConfusionMatrixDisplay(cm_VN_G_D).plot(cmap='Blues')
    plt.show()
    
# Fill the right output
with right_out:
    print("Full Testing Grade")
    cm_VN_G_F = prep_stats(y_true=mergeDF['Binary_Label'], y_pred=mergeDF['Binary_Full_Grade'])
    sk.ConfusionMatrixDisplay(cm_VN_G_F).plot(cmap='Blues')
    plt.show()

# Display side-by-side
box = widgets.HBox([left_out, right_out])
display(box)

VN: GS vs. Glaucoma  ---------------------


HBox(children=(Output(), Output()))

In [30]:
y_true = (mergeDF['Binary_Label'] == 1) & (mergeDF['MD'] < -6)
y_pred = (mergeDF['Binary_Disc_Grade'] == 1)  & (mergeDF['MD'] < -6)

# GS+Mild vs. Mod+Severe Glaucoma

left_out = widgets.Output()
right_out = widgets.Output()

print("VN: GS+Mild vs. Mod+Severe Glaucoma  ---------------------")

# Fill the left output
with left_out:
    print("Disc Grade")
    cm_VN_S_D = prep_stats(
        y_true=(mergeDF['Binary_Label'] == 1) & (mergeDF['MD'] < -6), 
        y_pred=mergeDF['Binary_Disc_Severity']
    )
    sk.ConfusionMatrixDisplay(cm_VN_S_D).plot(cmap='Blues')
    plt.show()
    
# Fill the right output
with right_out:
    print("Full Testing Grade")
    cm_VN_S_F = prep_stats(
        y_true=(mergeDF['Binary_Label'] == 1) & (mergeDF['MD'] < -6),
        y_pred=(mergeDF['Binary_Full_Grade'] == 1) & (mergeDF['MD'] < -6)
    )
    sk.ConfusionMatrixDisplay(cm_VN_S_F).plot(cmap='Blues')
    plt.show()

# Display side-by-side
box = widgets.HBox([left_out, right_out])
display(box)

VN: GS+Mild vs. Mod+Severe Glaucoma  ---------------------


HBox(children=(Output(), Output()))

# Combined Graders

In [199]:
# GS vs. Glaucoma

cm_KBVN_D = cm_KB_G_D+cm_VN_G_D
cm_KBVN_F = cm_KB_G_F+cm_VN_G_F

left_out = widgets.Output()
right_out = widgets.Output()

print("GS vs. Glaucoma  ---------------------")

# Fill the left output
with left_out:
    print("Disc Grade")
    prep_stats(cm=cm_KBVN_D)
    sk.ConfusionMatrixDisplay(cm_KBVN_D).plot(cmap='Blues')
    plt.show()
    
# Fill the right output
with right_out:
    print("Full Testing Grade")
    prep_stats(cm=cm_KBVN_F)
    sk.ConfusionMatrixDisplay(cm_KBVN_F).plot(cmap='Blues')
    plt.show()

# Display side-by-side
box = widgets.HBox([left_out, right_out])
display(box)

GS vs. Glaucoma  ---------------------


HBox(children=(Output(), Output()))

In [201]:
# GS+Mild vs. Mod+Severe Glaucoma

cm_KBVN_D = cm_KB_S_D+cm_VN_S_D
cm_KBVN_F = cm_KB_S_F+cm_VN_S_F

left_out = widgets.Output()
right_out = widgets.Output()

print("GS+Mild vs. Mod+Severe Glaucoma  ---------------------")

# Fill the left output
with left_out:
    print("Disc Grade")
    prep_stats(cm=cm_KBVN_D)
    sk.ConfusionMatrixDisplay(cm_KBVN_D).plot(cmap='Blues')
    plt.show()
    
# Fill the right output
with right_out:
    print("Full Testing Grade")
    prep_stats(cm=cm_KBVN_F)
    sk.ConfusionMatrixDisplay(cm_KBVN_F).plot(cmap='Blues')
    plt.show()

# Display side-by-side
box = widgets.HBox([left_out, right_out])
display(box)

GS+Mild vs. Mod+Severe Glaucoma  ---------------------


HBox(children=(Output(), Output()))

In [None]:
# Buffer