<a href="https://colab.research.google.com/github/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/VGG19_Diagnosis_Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# VGG19 Training

This notebook is used to train VGG19 model for glacoma diagnosis.

In [None]:
# import sys
# IN_COLAB = 'google.colab' in sys.modules

# if IN_COLAB:
#     !pip install deriva
#     !pip install bdbag
#     !pip install --upgrade --force pydantic
#     !pip install git+https://github.com/informatics-isi-edu/deriva-ml git+https://github.com/informatics-isi-edu/eye-ai-ml

In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [None]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:

from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

Connect to Eye-AI catalog.  Configure to store data local cache and working directories.  Initialize Eye-AI for pending execution based on the provided configuration file.

In [None]:
# Variables to configure the rest of the notebook.

cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.

configuration_rid="2-C3QM" # rid
# Change the confi_file with bag_url=["minid: train", "minid: Valid", "minid: test"]


In [None]:
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
# @title Initiate an Execution
configuration_records = EA.execution_init(configuration_rid=configuration_rid)
configuration_records.model_dump()

In [None]:
configuration_records

In [None]:
# @title Data Preprocessing (Filtering Image.csv for just Field_2 Images)
train_dir = configuration_records.bag_paths[0] # path to the raw train dataset
validation_dir = configuration_records.bag_paths[1]
test_dir = configuration_records.bag_paths[2]

# train_cropped_image_path = path to training cropped images
train_cropped_image_path, train_cropped_csv = EA.create_cropped_images(str(train_dir),
                                                                     output_dir = str(EA.working_dir) +'/train',
                                                                       crop_to_eye=True)
validation_cropped_image_path, validation_cropped_csv = EA.create_cropped_images(str(validation_dir),
                                                                                 output_dir = str(EA.working_dir) +'/valid',
                                                                                 crop_to_eye=True)
test_cropped_image_path, test_cropped_csv = EA.create_cropped_images(str(test_dir),
                                                                     output_dir = str(EA.working_dir) +'/test',
                                                                     crop_to_eye=True)


In [None]:

output_path = os.path.join(str(EA.working_dir), configuration_records.vocabs['Execution_Asset_Type'][0].name)
os.mkdir(output_path)

In [None]:
output_path

In [None]:
best_hyper_parameters_json_path = str(configuration_records.assets_paths[0])

In [None]:
best_hyper_parameters_json_path

In [None]:
# @title Execute Training algorithm
from eye_ai.models.vgg19_diagnosis_train import main
with EA.execution(execution_rid=configuration_records.execution_rid) as exec:
  main(train_path=train_cropped_image_path,
       valid_path=validation_cropped_image_path, 
       test_path=test_cropped_image_path, 
       output_path = output_path,
       best_hyperparameters_json_path = best_hyper_parameters_json_path
       )
                    


In [None]:
# @title Save Execution Assets (model) and Metadata
uploaded_assets = EA.execution_upload(configuration_records.execution_rid, False)