## Initial Setup:
This step initializes the necessary configuration.

In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-exec" / "models" / "vgg19"))

In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging
from datetime import datetime

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration, DatasetVersion
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

## Downloading Dataset:
Downloading the datasets. We will work with three datasets: 2-A5T0 (train), 2-A5T2 (val), and 2-A5T4 (test). The dataset order when extracting is always set in the list provided when downloading. Additionally, this code will always download the latest version of the datasets.

In [None]:
# RID of source dataset, if any.
datasets = [
            '2-A5T0',
            '2-A5T2',
            '2-A5T4',
]

to_be_download = []
for dataset in datasets:
    ds_dict = {
        'rid': dataset,
        'materialize':True,
        'version':EA.dataset_version(dataset_rid=dataset),
    }
    to_be_download.append(ds_dict)
EA.add_term(vc.workflow_type, "VGG19 Model Train", description="A workflow to train VGG19 model")

workflow_rid = EA.create_workflow(
    name="VGG19 Model train",
    workflow_type="VGG19 Model Train",
)

download_assets = True

config = ExecutionConfiguration(
    datasets=to_be_download if download_assets else [],
    workflow=workflow_rid,
    description="Instance of training VGG19 model",
)
    
execution = EA.create_execution(config)

In [None]:
print(execution)

In [None]:
ds_bag_train = execution.datasets[0]
ds_bag_val = execution.datasets[1]
ds_bag_test = execution.datasets[2]

In [None]:
output_dir = execution._working_dir / execution.execution_rid

## Preprocessing:
Crop the images and move them to the designated folder for training, validation, and testing.

In [None]:
train_image_path_cropped, csv_ds_bag_cropped = EA.create_cropped_images(ds_bag = ds_bag_train, 
                                                                              output_dir = output_dir / "train", 
                                                                              crop_to_eye=True)


validation_image_path_cropped, validation_csv_cropped = EA.create_cropped_images(ds_bag = ds_bag_val,
                                                                                 output_dir = output_dir / "val",
                                                                                 crop_to_eye=True)

test_image_path_cropped, test_csv_cropped = EA.create_cropped_images(ds_bag = ds_bag_test,
                                                                     output_dir = output_dir / "test",
                                                                     crop_to_eye =True)

In [None]:
asset_path_models = execution.execution_asset_path("Diagnosis_Model")
asset_path_output = execution.execution_asset_path("Model_Prediction")
asset_path_logs = execution.execution_asset_path("Training_Log")

In [None]:
current_date = datetime.now().strftime("%b_%d_%Y") 
print(current_date)

## Train and Evaluate:

In [None]:
from vgg19_diagnosis_train import train_and_evaluate
with execution.execute() as exec:
        predictions_results, metrics_summary, model_save_path, training_history_csv = train_and_evaluate(
            train_path=train_image_path_cropped,
            valid_path=validation_image_path_cropped, 
            test_path=test_image_path_cropped, 
            model_path=asset_path_models,
            log_path=asset_path_logs,
            eval_path=asset_path_output,
            model_name = f"VGG19_Model_{ds_bag_train.dataset_rid}_{current_date}"
            )
        print("Execution Results:")
        print(predictions_results, metrics_summary, model_save_path, training_history_csv)


## Evaluate Only:
If you already have a VGG19 model, provide its path here to evaluate it directly.

In [None]:
model_path = "path/to/your/model.h5"

In [None]:
from vgg19_diagnosis_train import evaluate_only
with execution.execute() as exec:
        predictions_results, metrics_summary = evaluate_only(
            model_path = model_path, 
            model_name = f"VGG19_Model_{ds_bag_train.dataset_rid}_{current_date}", 
            test_path = test_image_path_cropped, 
            output_dir = asset_path_output,
        )
        print("Execution Results:")
        print(predictions_results, metrics_summary)

## Upload results:

In [None]:
execution.upload_execution_outputs(clean_folder=True)