# Connect Eye-AI and Load Libraries

In [None]:
%load_ext autoreload
%autoreload 2

# # Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / "eye-ai-ml"))
sys.path.insert(0, str(Path.home() / "eye-ai-exec"))

In [None]:
# Prerequisites
import json
import os

# EyeAI, Deriva, VGG19
from deriva_ml import DatasetSpec, DatasetBag, Workflow, ExecutionConfiguration, VersionPart
from deriva_ml import MLVocab as vc
from eye_ai.eye_ai import EyeAI
from models.vgg19 import vgg19_diagnosis_train

# ML Analytics
import pandas as pd
import numpy as np
from sklearn.calibration import calibration_curve
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Other Utilities
from pathlib import Path, PurePath
import logging
from datetime import datetime

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
host = 'www.eye-ai.org'
#host = 'dev.eye-ai.org' #for dev testing
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

# Configuration

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:

#EA.increment_dataset_version(dataset_rid='4-YWKJ', component= VersionPart.patch, description='Update after annotations added')

In [None]:
source_dataset = "4-S42W" # New LAC test (balanced)

# asset_RID = ["4-MWQ6"]  # VGG19 cropped
# crop = True

asset_RID = ["4-MWQ8"]  # VGG19 uncropped
crop = False

preds_workflow = EA.add_workflow( 
    Workflow(
        name="VGG Predictions by KB",
        url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/Sandbox_KB/VGG_Predict.ipynb",
        workflow_type="Test Workflow",
        )
    )

config = ExecutionConfiguration(
    datasets=[ DatasetSpec(rid=source_dataset, version=EA.dataset_version(source_dataset), materialize=True) ],
    assets=asset_RID,
    workflow=preds_workflow,
    description="Instance of creating VGG19 predictions: VGG19 Uncropped on 4-YWKJ USC Test",
    )

execution = EA.create_execution(config)

In [None]:
print(execution)

In [None]:
output_dir = execution._working_dir / execution.execution_rid
output_dir.mkdir(parents=True, exist_ok=True)
output_dir

In [None]:
ds_bag_test = execution.datasets[0]

In [None]:
# FOR LAC DATA

test_image_path_cropped, test_csv_cropped = EA.create_cropped_images(ds_bag = ds_bag_test,
                                                                     output_dir = output_dir / "dataset" / "test",
                                                                     crop_to_eye = crop)

In [None]:
asset_output_dir = execution._working_dir / execution.execution_rid / "asset"
asset_output_dir.mkdir( parents=True, exist_ok=True )

In [None]:
current_date = datetime.now().strftime("%b_%d_%Y") 

In [None]:
model_path = str(execution.asset_paths['Execution_Asset'][0])

In [None]:
test_image_path_cropped

In [None]:
!ls /data/kb_766/EyeAI_working/4-YX6W/dataset/test/

In [None]:
pd.read_csv( "/data/kb_766/EyeAI_working/4-YX6W/dataset/test/Image.csv" )

In [None]:

with execution.execute() as exec:
        predictions_results, metrics_summary = vgg19_diagnosis_train.evaluate_only(
            model_path = model_path, 
            model_name = f"VGG19_Uncropped_Model_{ds_bag_test.dataset_rid}_{current_date}", 
            test_path = test_image_path_cropped, 
            output_dir = asset_output_dir,
            classes = {'No_Glaucoma': 0, 'Suspected_Glaucoma': 1}
        )
        print("Execution Results:")
        print(predictions_results, metrics_summary)

In [None]:
pd.read_csv( metrics_summary )

In [None]:
preds = pd.read_csv( predictions_results )
preds['Filename'].str.split( pat = "/", expand = True)[0].value_counts()

In [None]:
# Calibration curve
prob_true, prob_pred = calibration_curve( preds["True Label"], preds["Probability Score"], n_bins=10, strategy='uniform')
plt.plot(prob_pred, prob_true, marker='o', label='Model')
plt.plot([0, 1], [0, 1], linestyle='--', label='Perfectly calibrated')
plt.xlabel('Mean predicted probability')
plt.ylabel('Fraction of positives')
plt.title('Calibration curve')
plt.legend()
plt.show()

In [None]:
# Gather patient data

preds['Image'] = preds['Filename'].apply(lambda x: x.split("_")[2].split(".")[0])

# Link to diagnosis data
linkdDF = pd.merge( preds,
    ds_bag_test.get_table_as_dataframe('Image_Diagnosis')[['Image','Diagnosis_Image']],
    on = 'Image', how = 'left' 
)

# Link to image data
linkdDF = pd.merge( linkdDF,
                   ds_bag_test.get_table_as_dataframe('Image')[['RID', 'Observation']],
                   left_on = 'Image', right_on = 'RID', 
                   how = 'left')
linkdDF = linkdDF.drop('RID', axis = 1)

# Link to observation data
linkdDF = pd.merge( linkdDF,
                   ds_bag_test.get_table_as_dataframe('Observation')[['RID', 'Subject']],
                   left_on = 'Observation', right_on = 'RID', 
                   how = 'left')
linkdDF = linkdDF.drop('RID', axis = 1)

linkdDF = pd.merge( linkdDF,
                   ds_bag_test.get_table_as_dataframe('Subject')[['RID', 'Subject_Gender', 'Subject_Ethnicity']],
                   left_on = 'Subject', right_on = 'RID', 
                   how = 'left')
linkdDF = linkdDF.drop('RID', axis = 1)

linkdDF['Subject_Ethnicity'] = linkdDF['Subject_Ethnicity'].astype('category')


In [None]:
linkdDF['Subject_Gender'].value_counts()/656, linkdDF['Subject_Ethnicity'].value_counts()/656

In [None]:
# Explore performance metrics

# Confirm AUC
fpr, tpr, thresholds = roc_curve( preds["True Label"], preds["Probability Score"])
print( "Overall AUC" )
print( auc(fpr, tpr) )

# Check AUC by ethnicity
for e in pd.unique( linkdDF['Subject_Ethnicity'] ):
    tDF = linkdDF[ linkdDF['Subject_Ethnicity'] == e ]
    fpr, tpr, thresholds = roc_curve( tDF["True Label"], tDF["Probability Score"])
    print( e )
    print( auc(fpr, tpr) )

# Check AUC by gender
for g in pd.unique( linkdDF['Subject_Gender'] ):
    tDF = linkdDF[ linkdDF['Subject_Gender'] == g ]
    fpr, tpr, thresholds = roc_curve( tDF["True Label"], tDF["Probability Score"])
    print( g )
    print( auc(fpr, tpr) )

In [None]:
linkdDF[ (linkdDF[ 'Diagnosis_Image' ] == 'Suspected Glaucoma') & (linkdDF[ 'Prediction' ] == 0) ]

# Upload Results

In [None]:
# # crete asset path
# asset_type_name = "Diagnosis_Analysis"
# asset_path = exec.execution_asset_path(asset_type_name)

# # save assets to asset_path
# linkdDF.to_csv(asset_path/'ImagesToVGG19.csv', index=False)

# upload assets to catalog
exec.upload_execution_outputs(clean_folder=True)