<a href="https://colab.research.google.com/github/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/VGG19_Diagnosis_Predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# VGG19 Model Application

This notebook applied a pre-trained model to a dataset specified in the configuration file and uploads the labels to the catalog.  The ROC curve is also calculated and uploaded.


In [1]:
# Prerequisites to configure colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    !pip install deriva
    !pip install bdbag
    !pip install --upgrade --force pydantic
    !pip install git+https://github.com/informatics-isi-edu/deriva-ml git+https://github.com/informatics-isi-edu/eye-ai-ml
    !pip install setuptools_git_versioning


In [2]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [3]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [4]:

from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

2024-07-01 15:33:58,667 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-07-01 15:33:58,668 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


You are already logged in.


Connect to Eye-AI catalog.  Configure to store data local cache and working directories.  Initialize Eye-AI for pending execution based on the provided configuration file.

In [5]:
# Variables to configure the rest of the notebook.

cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.

configuration_rid="2-C9F2"      # Configuration file for this run.  Needs to be changed for each execution.

In [6]:
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

2024-07-01 15:33:59,761 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-07-01 15:33:59,762 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


In [7]:
# @title Initiate an Execution
configuration_records = EA.execution_init(configuration_rid=configuration_rid)
input_dataset = configuration_records.bag_paths[0] # Assumes that the configuration file only specifies one dataset.
configuration_records.model_dump()

# {'caching_dir': PosixPath('/data'),
#  'working_dir': PosixPath('/data/sreenidhi/EyeAI_working'),
#  'vocabs': {'Workflow_Type': [{'name': 'VGG19_Catalog_Model_Prediction',
#     'rid': '2-C9AT'}],
#   'Execution_Asset_Type': [{'name': 'VGG19_Catalog_Model_Prediction',
#     'rid': '2-C9AW'}]},
#  'execution_rid': '2-C9B6',
#  'workflow_rid': '2-C9AY',
#  'bag_paths': [PosixPath('/data/2-277M_8c4b855c2752e098580a5bb0d1b63a8cedde4462805fe74cddc912a72fb39963/Dataset_2-277M')],
#  'assets_paths': [PosixPath('/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024.h5'),
#   PosixPath('/data/sreenidhi/EyeAI_working/Execution_Assets/best_hyperparameters_exluding_no_optic_disc_images_june_24_2024.json')],
#  'configuration_path': PosixPath('/data/sreenidhi/EyeAI_working/Execution_Metadata/Execution_Config-vgg19_catalog_model_prediction_more_metrics_sreenidhi_june_24_2024.json')}

2024-07-01 15:34:00,477 - INFO - File [/data/sreenidhi/EyeAI_working/Execution_Metadata/Execution_Config-vgg19_catalog_van_finetuned_model_prediction_more_metrics_sreenidhi_june_30_2024.json] transfer successful. 0.95 KB transferred. Elapsed time: 0:00:00.000065.
2024-07-01 15:34:00,477 - INFO - Verifying MD5 checksum for downloaded file [/data/sreenidhi/EyeAI_working/Execution_Metadata/Execution_Config-vgg19_catalog_van_finetuned_model_prediction_more_metrics_sreenidhi_june_30_2024.json]
2024-07-01 15:34:00,492 - INFO - Configuration validation successful!
2024-07-01 15:34:06,539 - INFO - File [/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned.h5] transfer successful. 232.71 MB transferred at 53.43 MB/second. Elapsed time: 0:00:04.355260.
2024-07-01 15:34:06,818 - INFO - File [/data/sreenidhi/EyeAI_working/Execution_Assets/best_hyperparameters_exluding_no_optic_disc_images_june_24_

{'caching_dir': PosixPath('/data'),
 'working_dir': PosixPath('/data/sreenidhi/EyeAI_working'),
 'vocabs': {'Workflow_Type': [{'name': 'VGG19_Catalog_Model_Van_FineTuned_Prediction',
    'rid': '2-C9F6'}],
  'Execution_Asset_Type': [{'name': 'VGG19_Catalog_Model_Van_FineTuned_Prediction',
    'rid': '2-C9F8'}]},
 'execution_rid': '2-C9FJ',
 'workflow_rid': '2-C9FA',
 'bag_paths': [PosixPath('/data/2-277M_8c4b855c2752e098580a5bb0d1b63a8cedde4462805fe74cddc912a72fb39963/Dataset_2-277M')],
 'assets_paths': [PosixPath('/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned.h5'),
  PosixPath('/data/sreenidhi/EyeAI_working/Execution_Assets/best_hyperparameters_exluding_no_optic_disc_images_june_24_2024.json')],
 'configuration_path': PosixPath('/data/sreenidhi/EyeAI_working/Execution_Metadata/Execution_Config-vgg19_catalog_van_finetuned_model_prediction_more_metrics_sreenidhi_june_30_2024.json

Algorithm was trained on cropped images, so take the raw images and bounding boxes and apply, storing the results in the working directory.

In [8]:
configuration_records

ConfigurationRecord(caching_dir=PosixPath('/data'), working_dir=PosixPath('/data/sreenidhi/EyeAI_working'), vocabs={'Workflow_Type': [Term(name='VGG19_Catalog_Model_Van_FineTuned_Prediction', rid='2-C9F6')], 'Execution_Asset_Type': [Term(name='VGG19_Catalog_Model_Van_FineTuned_Prediction', rid='2-C9F8')]}, execution_rid='2-C9FJ', workflow_rid='2-C9FA', bag_paths=[PosixPath('/data/2-277M_8c4b855c2752e098580a5bb0d1b63a8cedde4462805fe74cddc912a72fb39963/Dataset_2-277M')], assets_paths=[PosixPath('/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned.h5'), PosixPath('/data/sreenidhi/EyeAI_working/Execution_Assets/best_hyperparameters_exluding_no_optic_disc_images_june_24_2024.json')], configuration_path=PosixPath('/data/sreenidhi/EyeAI_working/Execution_Metadata/Execution_Config-vgg19_catalog_van_finetuned_model_prediction_more_metrics_sreenidhi_june_30_2024.json'))

In [9]:
str(EA.working_dir)

'/data/sreenidhi/EyeAI_working'

In [10]:
# @title Get Cropped Images
cropped_image_path, cropped_csv = EA.create_cropped_images(str(configuration_records.bag_paths[0]),
                                                           output_dir = str(EA.working_dir),
                                                           crop_to_eye=True)

In [11]:
import os

def count_files(directory):
    return len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])

def analyze_graded_test_dataset(base_path):
    main_folders = ['Image_cropped'] #, 
    
    for main_folder in main_folders:
        main_folder_path = os.path.join(base_path, main_folder)
        if not os.path.exists(main_folder_path):
            print(f"{main_folder} folder not found")
            continue
        
        print(f"\nAnalyzing {main_folder} folder:")
        
        total_files = 0
        for angle_folder in os.listdir(main_folder_path):
            angle_folder_path = os.path.join(main_folder_path, angle_folder)
            if os.path.isdir(angle_folder_path):
                file_count = count_files(angle_folder_path)
                print(f"  {angle_folder}: {file_count} images")
                total_files += file_count
        
        print(f"Total images in {main_folder}: {total_files}")

# Usage
base_path = "/data/sreenidhi/EyeAI_working/"
analyze_graded_test_dataset(base_path)



Analyzing Image_cropped folder:
  2SKC_No_Glaucoma: 526 images
  2SKA_Suspected_Glaucoma: 568 images
Total images in Image_cropped: 1094


In [12]:
best_hyper_parameters_json_path = str(configuration_records.assets_paths[1])
best_hyper_parameters_json_path

'/data/sreenidhi/EyeAI_working/Execution_Assets/best_hyperparameters_exluding_no_optic_disc_images_june_24_2024.json'

In [13]:
model_path = configuration_records.assets_paths[0]
model_path 

PosixPath('/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned.h5')

In [14]:
cropped_image_path

PosixPath('/data/sreenidhi/EyeAI_working/Image_cropped')

In [15]:

output_path = str(EA.working_dir) + "/Execution_Assets/" + configuration_records.vocabs['Execution_Asset_Type'][0].name
os.mkdir(output_path)

output_path

'/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_Model_Van_FineTuned_Prediction'

Import the actual model code and then run against the input dataset specified in the configuration file.  

In [16]:
# @title Execute Proecss algorithm (Test model)
from eye_ai.models.vgg19_diagnosis_predict_more_metrics import prediction

with EA.execution(execution_rid=configuration_records.execution_rid) as exec:
  prediction(
      model_path = model_path, 
      cropped_image_path = cropped_image_path, 
      output_dir = output_path, 
      best_hyperparameters_json_path = best_hyper_parameters_json_path)

2024-07-01 15:35:28.370261: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-01 15:35:28.370305: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-01 15:35:28.371239: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-01 15:35:28.377890: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-01 15:35:29.962876: I external/local_xla/xla/

Found 1094 images belonging to 2 classes.


2024-07-01 15:35:31.567324: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907




2024-07-01 15:35:39,718 - INFO - 
Metrics for VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned:
2024-07-01 15:35:39,719 - INFO - Accuracy: 0.8180987202925045
2024-07-01 15:35:39,720 - INFO - Precision: 0.8265486725663717
2024-07-01 15:35:39,720 - INFO - Recall: 0.8221830985915493
2024-07-01 15:35:39,721 - INFO - F1 Score: 0.824360105913504
2024-07-01 15:35:39,721 - INFO - ROC AUC: 0.8902359021046431
2024-07-01 15:35:39,742 - INFO - Classification Report for VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned:
2024-07-01 15:35:39,743 - INFO - {
  "2SKC_No_Glaucoma": {
    "precision": 0.8090737240075614,
    "recall": 0.8136882129277566,
    "f1-score": 0.8113744075829384,
    "support": 526.0
  },
  "2SKA_Suspected_Glaucoma": {
    "precision": 0.8265486725663717,
    "recall": 0.8221830985915493,
    "f1-score": 0.824360105913504,
    "support": 568.0
  },
  "accuracy": 0


Metrics for VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned:
Accuracy: 0.8180987202925045
Precision: 0.8265486725663717
Recall: 0.8221830985915493
F1 Score: 0.824360105913504
ROC AUC: 0.8902359021046431

Classification Report for VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned:
                         precision    recall  f1-score   support

       2SKC_No_Glaucoma       0.81      0.81      0.81       526
2SKA_Suspected_Glaucoma       0.83      0.82      0.82       568

               accuracy                           0.82      1094
              macro avg       0.82      0.82      0.82      1094
           weighted avg       0.82      0.82      0.82      1094



2024-07-01 15:35:40,591 - INFO - Confusion matrix saved as VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned_confusion_matrix.png (300 DPI)
2024-07-01 15:35:41,006 - INFO - ROC curve saved as VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned_roc_curve.png (300 DPI)
2024-07-01 15:35:41,009 - INFO - Data saved to VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned_predictions_results.csv
2024-07-01 15:35:41,010 - INFO - Metrics saved to VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Trained_model_June_24_2024_Van_Fine_Tuned_metrics.json


In [17]:
# @title Save Execution Assets (model) and Metadata
uploaded_assets = EA.execution_upload(configuration_records.execution_rid, True)


2024-07-01 15:37:39,201 - INFO - Initializing uploader: GenericUploader v1.7.1 [Python 3.10.13, Linux-5.10.210-201.852.amzn2.x86_64-x86_64-with-glibc2.26]
2024-07-01 15:37:39,202 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-07-01 15:37:39,202 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2024-07-01 15:37:39,241 - INFO - Checking for updated configuration...
2024-07-01 15:37:39,369 - INFO - Updated configuration found.
2024-07-01 15:37:39,371 - INFO - Scanning files in directory [/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_Model_Van_FineTuned_Prediction]...
2024-07-01 15:37:39,374 - INFO - Including file: [/data/sreenidhi/EyeAI_working/Execution_Assets/VGG19_Catalog_Model_Van_FineTuned_Prediction/VGG19_Catalog_LAC_DHS_Cropped_Data_exlcuding_no_Optic_disc_fundus_Tr