In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
sys.path.append('Repos/eye-ai-exec/models/vgg19')

In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration, DatasetVersion
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
# RID of source dataset, if any.
datasets = [
                 '4-PV06',
                  '2-39FY', 
                  '2-277M',
]

to_be_download = []
for dataset in datasets:
    ds_dict = {
        'rid': dataset,
        'materialize':True,
        'version':EA.dataset_version(dataset_rid=dataset),
    }
    to_be_download.append(ds_dict)
# EA.add_term(vc.workflow_type, "VGG19 Model Train", description="A workflow to train VGG19 model")
# Workflow instance
workflow_rid = EA.add_workflow(Workflow(
    name="VGG19 Model train - 3000 images",
    url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/VGG19_Huy/VGG19_TRAIN_3000.ipynb",
    workflow_type="VGG19 Model Train",
    )
)



# Set to False if you only need the metadata from the bag, and not the assets.
download_assets = True

config = ExecutionConfiguration(
    # Comment out the following line if you don't need the assets.
    datasets=to_be_download if download_assets else [],
    workflow=workflow_rid,
    description="Instance of training VGG19 model - 3000 images",
)
    

# Initialize execution
execution = EA.create_execution(config)

In [None]:
print(execution)

In [None]:
ds_bag_0 = execution.datasets[0]


ds_bag_val = execution.datasets[1]
ds_bag_test = execution.datasets[2]

In [None]:
ds_bag_list = [ds_bag_0]

In [None]:
val_excluded_df = pd.read_csv("valid_no_optic_disc_image_ids.csv")
val_excluded = val_excluded_df["ID"].tolist()

train_excluded_df = pd.read_csv("train_no_optic_disc_image_ids.csv")
train_excluded = train_excluded_df["ID"].tolist()

test_included_df = pd.read_csv("Graded_Test_Dataset_2-277M_With_Demographics_CDR_Diagnosis_Image_Quality_Model_Diagnosis_Predicitons_with_Jiun_Do_June8_2024_with_Catalog_model_predictions.csv")
test_included = test_included_df["Image_cd"].tolist()

In [None]:
output_dir = execution._working_dir
validation_image_path_cropped, validation_csv_cropped = EA.create_cropped_images(ds_bag_val,
                                                                                 output_dir = output_dir ,
                                                                                 crop_to_eye=True,
                                                                                exclude_list= val_excluded)

validation_image_path_uncropped, validation_csv_uncropped = EA.create_cropped_images(ds_bag_val,
                                                                                 output_dir = output_dir,
                                                                                 crop_to_eye=False,
                                                                                    exclude_list= val_excluded)

test_image_path_cropped, test_csv_cropped = EA.create_cropped_images(ds_bag_test,
                                                                     output_dir = output_dir,
                                                                     crop_to_eye=True,
                                                                     include_only_list = test_included)

test_image_path_uncropped, test_csv_uncropped = EA.create_cropped_images(ds_bag_test,
                                                                         output_dir = output_dir ,
                                                                         crop_to_eye=False,
                                                                         include_only_list = test_included)

In [None]:
best_hyper_parameters_json_path = "best_hyperparameters_exluding_no_optic_disc_images_june_24_2024.json"
best_hyper_parameters_json_path

In [None]:
# crete asset path
asset_path_models = execution.execution_asset_path("Diagnosis_Model")
asset_path_output = execution.execution_asset_path("Model_Prediction")
asset_path_logs = execution.execution_asset_path("Training_Log")

In [None]:
asset_path_output

In [None]:
from datetime import datetime

current_date = datetime.now().strftime("%b_%d_%Y") 
print(current_date)

In [None]:
from vgg19_diagnosis_train import train_and_evaluate
with execution.execute() as exec:
    for ds_bag in ds_bag_list:
        image_path_ds_bag_path_cropped, csv_ds_bag_cropped = EA.create_cropped_images(
                                                   ds_bag, 
                                                   output_dir, 
                                                   crop_to_eye=True,
                                                     exclude_list=train_excluded)
        image_path_ds_bag_path_uncropped, csv_ds_bag_uncropped = EA.create_cropped_images(
                                                   ds_bag, 
                                                   output_dir, 
                                                   crop_to_eye=False,
                                                 exclude_list=train_excluded)
        print("Dataset: ", ds_bag.dataset_rid)
        
        predictions_results_cropped, metrics_summary_cropped, model_save_path_cropped, training_history_csv_cropped=train_and_evaluate(
            train_path=image_path_ds_bag_path_cropped,
            valid_path=validation_image_path_cropped, 
            test_path=test_image_path_cropped, 
            model_path=asset_path_models,
            log_path=asset_path_logs,
            eval_path=asset_path_output,
            best_hyperparameters_json_path = best_hyper_parameters_json_path,
            model_name = f"VGG19_3000_Images_Cropped_{ds_bag.dataset_rid}_{current_date}"
           )

        predictions_results, metrics_summary, model_save_path, training_history_csv = train_and_evaluate(
            train_path=image_path_ds_bag_path_uncropped,
            valid_path=validation_image_path_uncropped, 
            test_path=test_image_path_uncropped, 
            model_path=asset_path_models,
            log_path=asset_path_logs,
            eval_path=asset_path_output,
            best_hyperparameters_json_path = best_hyper_parameters_json_path,
            model_name = f"VGG19_3000_Images_Uncropped_{ds_bag.dataset_rid}_{current_date}"
           )
        print("Uncropped")
        print(predictions_results, metrics_summary, model_save_path, training_history_csv)
        print("Cropped")
        print(predictions_results_cropped, metrics_summary_cropped, model_save_path_cropped, training_history_csv_cropped)

In [None]:
execution.upload_execution_outputs(clean_folder=True)