## Initial Setup:
This step initializes the necessary configuration.

In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "RETFound_MAE"))

In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging
import torch

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration, DatasetVersion
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

## Downloading Dataset:
Downloading the datasets. We will work with three datasets: 2-A5T0 (train), 2-A5T2 (val), and 2-A5T4 (test). The dataset order when extracting is always set in the list provided when downloading. Additionally, this code will always download the latest version of the datasets.

In [None]:
datasets = [
        '2-A5T0',
        '2-A5T2',
        '2-A5T4',
    ]

to_be_download = []
for dataset in datasets:
    ds_dict = {
        'rid': dataset,
        'materialize':True,
        'version':EA.dataset_version(dataset_rid=dataset),
    }
    to_be_download.append(ds_dict)

workflow_instance = EA.add_workflow(Workflow(
    name="RETFound Model train",
    url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/RETFound_Huy/RETFOUND_DATA_TEMPLATE.ipynb",
    workflow_type="RETFound Model Train",
))

download_assets = True

config = ExecutionConfiguration(
    # Comment out the following line if you don't need the assets.
    datasets=to_be_download  if download_assets else [],
    assets = ['4-S3KR',  
             #4-S3KP,
             ],  #RETFound pre-trained weight.You should always has at least one when training.
    workflow=workflow_instance,
    description="Instance of training RETFound model")

# Initialize execution
execution = EA.create_execution(config)

In [None]:
print(execution)

## Preprocessing:
Crop the images and move them to the designated folder for training, validation, and testing.          

In [None]:
ds_bag_train = execution.datasets[0]
ds_bag_val = execution.datasets[1]
ds_bag_test = execution.datasets[2]

retfound_pretrained_weight = execution.asset_paths[0]

In [None]:
output_dir = execution._working_dir

In [None]:
ds_bag_train_dict = {"ds_bag": ds_bag_train}
ds_bag_val_dict = {"ds_bag": ds_bag_val}
ds_bag_test_dict = {"ds_bag": ds_bag_test}

In [None]:
"""
If the following function returns an error, it means that it has not been updated in Eye-AI.
Instead, your dataset directory should follow the format below for the pipeline to work.

├── data folder
    ├──train
        ├──class_a
        ├──class_b
        ├──class_c
    ├──val
        ├──class_a
        ├──class_b
        ├──class_c
    ├──test
        ├──class_a
        ├──class_b
        ├──class_c
"""
dataset_dir = EA.create_retfound_image_directory(ds_bag_train_dict =  ds_bag_train_dict, 
                                ds_bag_val_dict = ds_bag_val_dict, 
                                ds_bag_test_dict =  ds_bag_test_dict, 
                                output_dir =output_dir, 
                                crop_to_eye = True)[0]

In [None]:
asset_path_models = execution.execution_asset_path("Diagnosis_Model")
asset_path_output = execution.execution_asset_path("Model_Prediction")
asset_path_logs = execution.execution_asset_path("Training_Log")

In [None]:
from datetime import datetime
current_date = datetime.now().strftime("%b_%d_%Y") 
print(current_date)

In [None]:
RETFound_output = "./RETFound_output/task"
os.makedirs(RETFound_output, exist_ok= True)

Here are all the possible parameters that you can use for your training. You can find them in main_finetune.py.

# Train parameters
--batch_size: Batch size per GPU (default: 128)
--epochs: Number of training epochs (default: 50)
--accum_iter: Accumulate gradient iterations (default: 1)

# Model parameters
--model: Name of model to train (default: 'RETFound_mae')
--input_size: Image input size (default: 256)
--drop_path: Drop path rate (default: 0.2)

# Optimizer parameters
--clip_grad: Clip gradient norm (default: None)
--weight_decay: Weight decay (default: 0.05)
--lr: Learning rate (absolute lr) (default: None)
--blr: Base learning rate (default: 5e-3)
--layer_decay: Layer-wise learning rate decay (default: 0.65)
--min_lr: Lower bound for cyclic schedulers (default: 1e-6)
--warmup_epochs: Number of warmup epochs (default: 10)

# Augmentation parameters
--color_jitter: Color jitter factor (default: None)
--aa: AutoAugment policy (default: 'rand-m9-mstd0.5-inc1')
--smoothing: Label smoothing (default: 0.1)

# Random Erase parameters
--reprob: Random erase probability (default: 0.25)
--remode: Random erase mode (default: 'pixel')
--recount: Random erase count (default: 1)
--resplit: Do not random erase first augmentation split (default: False)

# Mixup parameters
--mixup: Mixup alpha (default: 0, mixup enabled if > 0)
--cutmix: CutMix alpha (default: 0, cutmix enabled if > 0)
--cutmix_minmax: CutMix min/max ratio (default: None)
--mixup_prob: Probability of performing Mixup or CutMix (default: 1.0)
--mixup_switch_prob: Probability of switching to CutMix (default: 0.5)
--mixup_mode: Mode of applying Mixup/CutMix (default: 'batch')

# Finetuning parameters
--finetune: Finetune from checkpoint (default: '')
--task: Task type for finetuning (default: '')
--global_pool: Use global pooling (default: True)
--cls_token: Use class token instead of global pool (default: False)

# Dataset parameters
--data_path: Dataset path (default: './data/')
--nb_classes: Number of classification categories (default: 8)
--output_dir: Path to save output (default: './output_dir')
--log_dir: Path for TensorBoard logs (default: './output_logs')
--device: Device to use for training/testing (default: 'cuda')
--seed: Random seed (default: 0)
--resume: Resume from checkpoint (default: '')
--start_epoch: Start epoch number (default: 0)
--eval: Perform evaluation only (default: False)
--dist_eval: Enable distributed evaluation (default: False)
--num_workers: Number of DataLoader workers (default: 10)
--pin_mem: Pin CPU memory in DataLoader for efficient GPU transfer (default: True)

# Distributed training parameters
--world_size: Number of distributed processes (default: 1)
--local_rank: Local rank for distributed training (default: -1)
--dist_on_itp: Enable distributed training on ITP (default: False)
--dist_url: URL for distributed training setup (default: 'env://')

# Additional fine-tuning parameters
--savemodel: Save the trained model (default: True)
--norm: Normalization method (default: 'IMAGENET')
--enhance: Use enhanced data (default: False)
--datasets_seed: Dataset random seed (default: 2026)

## Train and Evaluate:

In [None]:
from main_finetune import main, get_args_parser 
with execution.execute() as exec:
    args_list = [
        "--model", "RETFound_mae", # If you are using 4-S3KP asset, this would be RETFound_dinov2, which I would recommend take a look into.
        "--savemodel",
        "--global_pool",
        "--batch_size", "16",
        "--world_size", "1",
        "--epochs", "100",
        "--blr", "5e-3", "--layer_decay", "0.65",
        "--weight_decay", "0.05", "--drop_path", "0.2",
        "--nb_classes", "2",
        "--data_path", str(dataset_dir),
        "--input_size", "224",
        "--task", str(asset_path_output), # You will need to move content in this folder to asset_path_output for upload
        "--output_dir", str(asset_path_output),
        "--finetune", str(retfound_pretrained_weight),
    ]

    args = get_args_parser().parse_args(args_list)
    if args.output_dir:
        Path(args.output_dir).mkdir(parents=True, exist_ok=True)

    main(args)

## Evaluate Only:
If you already have a RETFound model, provide its path here to evaluate it directly.

In [None]:
from main_finetune import main, get_args_parser 
with execution.execute() as exec:
    path_to_model = "path/to/model.pth"
    args_list = [
        "--model", "RETFound_mae",
        "--eval",
        "--savemodel",
        "--global_pool",
        "--batch_size", "16",
        "--world_size", "1",
        "--epochs", "100",
        "--blr", "5e-3", "--layer_decay", "0.65",
        "--weight_decay", "0.05", "--drop_path", "0.2",
        "--nb_classes", "2",
        "--data_path", str(dataset_dir),
        "--input_size", "224",
        "--task", str(asset_path_output),
        "--output_dir", str(asset_path_output),
        "--resume", path_to_model,
    ]

    args = get_args_parser().parse_args(args_list)
    if args.output_dir:
        Path(args.output_dir).mkdir(parents=True, exist_ok=True)

    main(args)

## Upload results:

In [None]:
execution.upload_execution_outputs(clean_folder=True)