In [None]:
!conda install pytorch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 pytorch-cuda=12.1 -c pytorch -c nvidia -y
!cd ~/Repos && git clone --branch eye-ai-compatible https://github.com/huynguyentran/RETFound_MAE.git 
!cd ~/Repos/RETFound_MAE && git pull
!cd ~/Repos/RETFound_MAE && pip install -r requirements.txt

In [None]:
import torch
print(torch.version.cuda)  # Should print "12.1"
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should print "NVIDIA A10G"

In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-exec" / "models" / "vgg19"))
sys.path.insert(0, str(Path.home() / repo_dir / "RETFound_MAE"))

In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration, DatasetVersion
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
datasets = [
    '4-4116', # Selected images for training
    '4-411G', # Selected images for testing
    '2-7P5P', # Full multimodal dataset
    ]

to_be_download = []
for dataset in datasets:
    ds_dict = {
        'rid': dataset,
        'materialize':True,
        'version':EA.dataset_version(dataset_rid=dataset),
    }
    to_be_download.append(ds_dict)

workflow_instance = EA.create_workflow(
    name="Multimodal workflow",
    workflow_type="Multimodal workflow"
)

config = ExecutionConfiguration(
    datasets=to_be_download,
    assets = ['4-S4TJ',],
    workflow=workflow_instance,
    description="Instance of applying CV modelsto multimodal data. We are attempting to increase the accuracy of prediction by including table values into images prediction.")

execution = EA.create_execution(config)

In [None]:
print(execution)

In [None]:
training_ds_bag = execution.datasets[0]
testing_ds_bag = execution.datasets[1]

multimodal_full_ds_bag = execution.datasets[2]
retfound_pretrained_weight = execution.asset_paths[0]

In [None]:
def get_dataframe_from_bag(ds_bag: DatasetBag, multimodal_full_ds_bag: DatasetBag):
    observation_table = ds_bag.get_table_as_dataframe('Observation')
    image_table = ds_bag.get_table_as_dataframe('Image')
    laterality_table = ds_bag.get_table_as_dataframe('Execution_Image_Fundus_Laterality')

    image_table_filtered = image_table[['RID', 'Filename', 'Observation']].rename(columns={'RID': 'RID_Image'})
    laterality_table_filtered = laterality_table[['Image', 'Image_Side']].rename(columns={'Image': 'RID_Image'})
    image_laterality = pd.merge(image_table_filtered, laterality_table_filtered, left_on='RID_Image', right_on='RID_Image', how='inner')
    observation_table_filtered = observation_table[['RID',  'Subject']].rename(columns={'RID': 'RID_Observation'})
    image_laterality_observation = pd.merge(image_laterality, observation_table_filtered, left_on='Observation', right_on='RID_Observation', how='inner')

    wide = EA.multimodal_wide(multimodal_full_ds_bag) 

    image_observation_laterality_subject_wide = pd.merge(
     wide, 
     image_laterality_observation, 
     left_on=['RID_Subject', 'Image_Side'], 
     right_on=['Subject', 'Image_Side'], 
     how='inner'
    )

    return image_observation_laterality_subject_wide

In [None]:
train_df = get_dataframe_from_bag(training_ds_bag, multimodal_full_ds_bag)
test_df= get_dataframe_from_bag(testing_ds_bag, multimodal_full_ds_bag)

In [None]:
filtered_train_df = train_df[['RID_Image', 'Filename','Condition_Label', 'Condition_Display']]
filtered_train_df

In [None]:
filtered_test_df = test_df[['RID_Image', 'Filename','Condition_Label', 'Condition_Display']]
filtered_test_df 

In [None]:
working_dir = execution._working_dir / execution.execution_rid
working_dir.mkdir(parents=True, exist_ok=True)
working_dir 

In [None]:
classes = {
    "0_Glaucoma_Suspect": 0,
    "1_Glaucoma": 1, 
}

In [None]:
import numpy as np
import random

seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value)

In [None]:
import shutil 

def create_dataset_folder(df, output_path, output_name):
    output_path =  output_path / output_name
    output_path.mkdir(parents=True, exist_ok=True)
    
    output_path_suspected = output_path / "0_Glaucoma_Suspect"
    output_path_glaucoma = output_path / "1_Glaucoma"
    
    output_path_suspected.mkdir(parents=True, exist_ok=True)
    output_path_glaucoma.mkdir(parents=True, exist_ok=True)

    
    for index, row in df.iterrows():     
        src_path = row["Filename"]
        dest_name = row["RID_Image"] + ".jpg"
        label = row['Condition_Label']
        if label == "GS":
            dest_path = os.path.join(output_path_suspected, dest_name)
        elif label == "POAG" or label == "PACG":
            dest_path = os.path.join(output_path_glaucoma, dest_name)
        else: 
            continue    
        shutil.copy2(src_path, dest_path)
        
    return output_path 

train_dir = create_dataset_folder(filtered_train_df, working_dir, "train")
test_dir = create_dataset_folder(filtered_test_df, working_dir, "test")

In [None]:
train_dir, test_dir

In [None]:
def create_validation_set(train_dir, val_dir, split_ratio=0.15):
     os.makedirs(val_dir, exist_ok=True)

     for class_name in os.listdir(train_dir):
          class_train_path = os.path.join(train_dir, class_name)
          class_val_path = os.path.join(val_dir, class_name)

          if os.path.isdir(class_train_path):  
               os.makedirs(class_val_path, exist_ok=True)

               images = [f for f in os.listdir(class_train_path) if os.path.isfile(os.path.join(class_train_path, f))]
               num_val = int(len(images) * split_ratio)

               val_images = random.sample(images, num_val)
               for img in val_images:
                    shutil.move(os.path.join(class_train_path, img), os.path.join(class_val_path, img))

val_dir = working_dir / "val"
create_validation_set(train_dir, val_dir, split_ratio=0.2)

In [None]:
def count_images_per_class(directory):
     class_counts = {}
     for class_name in os.listdir(directory):
          class_path = os.path.join(directory, class_name)
          if os.path.isdir(class_path): 
               num_images = len([f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))])
               class_counts[class_name] = num_images
     return class_counts


train_counts = count_images_per_class(train_dir)
test_counts = count_images_per_class(test_dir)
val_counts = count_images_per_class(val_dir)

print("Training Set:")
for class_name, count in train_counts.items():
     print(f"  {class_name}: {count} images")

print("\nValidation Set:")
for class_name, count in test_counts.items():
     print(f"  {class_name}: {count} images")

print("\nTest Set:")
for class_name, count in test_counts.items():
     print(f"  {class_name}: {count} images")


In [None]:
import os

def get_rid_images_from_folder(folder_path, df):
    # Get all the filenames in the folder (excluding the path)
    filenames = os.listdir(folder_path)
    
    # Extract the 'RID_Image' values from the DataFrame that match the filenames (without the extension)
    rid_images_in_folder = df[df['RID_Image'].isin([os.path.splitext(f)[0] for f in filenames])]
    
    return rid_images_in_folder

# Example usage for the 'train_dir', 'val_dir', and 'test_dir' folders:

# For train directory
train_glaucoma_suspect_folder = os.path.join(train_dir, "Glaucoma_Suspect")
train_glaucoma_folder = os.path.join(train_dir, "Glaucoma")

train_glaucoma_suspect_rid_images = get_rid_images_from_folder(train_glaucoma_suspect_folder, filtered_train_df)
train_glaucoma_rid_images = get_rid_images_from_folder(train_glaucoma_folder, filtered_train_df)

# For validation directory
val_glaucoma_suspect_folder = os.path.join(val_dir, "Glaucoma_Suspect")
val_glaucoma_folder = os.path.join(val_dir, "Glaucoma")

val_glaucoma_suspect_rid_images = get_rid_images_from_folder(val_glaucoma_suspect_folder, filtered_train_df)
val_glaucoma_rid_images = get_rid_images_from_folder(val_glaucoma_folder, filtered_train_df)

# For test directory
test_glaucoma_suspect_folder = os.path.join(test_dir, "Glaucoma_Suspect")
test_glaucoma_folder = os.path.join(test_dir, "Glaucoma")

test_glaucoma_suspect_rid_images = get_rid_images_from_folder(test_glaucoma_suspect_folder, filtered_test_df)
test_glaucoma_rid_images = get_rid_images_from_folder(test_glaucoma_folder, filtered_test_df)

# Convert 'RID_Image' column to lists
train_glaucoma_suspect_rid_images_list = train_glaucoma_suspect_rid_images['RID_Image'].tolist()
train_glaucoma_rid_images_list = train_glaucoma_rid_images['RID_Image'].tolist()

val_glaucoma_suspect_rid_images_list = val_glaucoma_suspect_rid_images['RID_Image'].tolist()
val_glaucoma_rid_images_list = val_glaucoma_rid_images['RID_Image'].tolist()

test_glaucoma_suspect_rid_images_list = test_glaucoma_suspect_rid_images['RID_Image'].tolist()
test_glaucoma_rid_images_list = test_glaucoma_rid_images['RID_Image'].tolist()


def save_to_text_file(file_path, data_list):
    with open(file_path, 'w') as file:
        for item in data_list:
            file.write(f"{item}\n")

# Save 'RID_Image' lists to text files
save_to_text_file("train_glaucoma_suspect_rid_images.txt", train_glaucoma_suspect_rid_images_list)
save_to_text_file("train_glaucoma_rid_images.txt", train_glaucoma_rid_images_list)

save_to_text_file("val_glaucoma_suspect_rid_images.txt", val_glaucoma_suspect_rid_images_list)
save_to_text_file("val_glaucoma_rid_images.txt", val_glaucoma_rid_images_list)

save_to_text_file("test_glaucoma_suspect_rid_images.txt", test_glaucoma_suspect_rid_images_list)
save_to_text_file("test_glaucoma_rid_images.txt", test_glaucoma_rid_images_list)

In [None]:
asset_path_models = execution.execution_asset_path("Diagnosis_Model")
asset_path_output = execution.execution_asset_path("Model_Prediction")
asset_path_logs = execution.execution_asset_path("Training_Log")

In [None]:
from datetime import datetime

current_date = datetime.now().strftime("%b_%d_%Y") 
print(current_date)

In [None]:
train_dir, val_dir, test_dir

In [None]:
from main_finetune import main, get_args_parser 
import torch

# Train
with execution.execute() as exec:
    args_list = [
        "--model", "RETFound_mae",
        "--savemodel",
        "--global_pool",
        "--batch_size", "16",
        "--world_size", "1",
        "--epochs", "100",
        "--blr", "5e-3", "--layer_decay", "0.65",
        "--weight_decay", "0.05", "--drop_path", "0.2",
        "--nb_classes", "2",
        "--data_path", "/data/nguyent8/EyeAI_working/",
        "--input_size", "224",
        "--task", str(asset_path_output),
        "--output_dir", str(asset_path_output),
        "--finetune", str(retfound_pretrained_weight),
    ]

    args = get_args_parser().parse_args(args_list)
    criterion = torch.nn.CrossEntropyLoss()
    if args.output_dir:
        Path(args.output_dir).mkdir(parents=True, exist_ok=True)

    main(args, criterion)

In [None]:
from main_finetune import main, get_args_parser 
import torch

#Eval
with execution.execute() as exec:
    args_list = [
        "--model", "RETFound_mae",
        "--savemodel",
        "--eval",
        "--global_pool",
        "--batch_size", "16",
        "--world_size", "1",
        "--epochs", "100",
        "--blr", "5e-3", "--layer_decay", "0.65",
        "--weight_decay", "0.05", "--drop_path", "0.2",
        "--nb_classes", "2",
        "--data_path", "/data/nguyent8/EyeAI_working/",
        "--input_size", "224",
        "--task", str(asset_path_output),
        "--output_dir", str(asset_path_output),
        "--resume", str(retfound_pretrained_weight),
    ]

    args = get_args_parser().parse_args(args_list)
    criterion = torch.nn.CrossEntropyLoss()
    if args.output_dir:
        Path(args.output_dir).mkdir(parents=True, exist_ok=True)

    main(args, criterion)

In [None]:
print(str(asset_path_output))

In [None]:
from vgg19_diagnosis_train import train_and_evaluate
with execution.execute() as exec:
        predictions_results, metrics_summary, model_save_path, training_history_csv = train_and_evaluate(
            train_path=train_dir,
            valid_path=val_dir, 
            test_path=test_dir, 
            model_path=asset_path_models,
            log_path=asset_path_logs,
            eval_path=asset_path_output,
            model_name = f"VGG19_Multimodal_{current_date}",
            classes = classes,
            )
        print("Execution Results:")
        print(predictions_results, metrics_summary, model_save_path, training_history_csv)

In [None]:
print(predictions_results, metrics_summary, model_save_path, training_history_csv)

In [None]:
execution.upload_execution_outputs(clean_folder=True)