In [None]:
# import sys
# IN_COLAB = 'google.colab' in sys.modules

# if IN_COLAB:
#     !pip install deriva
#     !pip install bdbag
#     !pip install --upgrade --force pydantic
#     !pip install git+https://github.com/informatics-isi-edu/deriva-ml git+https://github.com/informatics-isi-edu/eye-ai-ml

In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [None]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:

from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
# Variables to configure the rest of the notebook.

cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.

configuration_rid = "2-C988" #"2-C8ZG" # rid
# Change the confi_file with bag_url=["minid: train", "minid: Valid", "minid: test"]



In [None]:
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
# @title Initiate an Execution
configuration_records = EA.execution_init(configuration_rid=configuration_rid)
configuration_records.model_dump()

In [None]:
configuration_records

In [None]:
test_dir=str(configuration_records.bag_paths[0]) 

print(test_dir)

subject_csv_path = os.path.join(test_dir, 'data', 'Image.csv')

subject_df = pd.read_csv(subject_csv_path)
subject_df = subject_df[subject_df.Image_Angle_Vocab == '2SK6']
subject_df



In [None]:
# @title Configure
dataset_rid = "2-277M" # @param {type:"string"}
diagnosis_tag_rid = "2-35RM" # @param {type:"string"}
compare_value = "Image_Quality" #@param {type:"string"}["Diagnosis", "Image_Quality", "Cup/Disk_Ratio"]
initial_diagnosis_tag_rid = "C1T4"
angle_two_rid = '2SK6'

# @title Example of Graded Test Dataset

Graded_test_initial_diag = EA.image_tall(dataset_rid, initial_diagnosis_tag_rid)
Graded_test_grader_diag = EA.image_tall(dataset_rid, diagnosis_tag_rid)
# Graded_test_gold = EA.image_tall(dataset_rid, "2-5GXP")
long, wide = EA.reshape_table([Graded_test_initial_diag, Graded_test_grader_diag], compare_value)


long = long[long.Full_Name	== 'Van Nguyen']

long.reset_index(inplace=True)
long

In [None]:
import os
import pandas as pd
import cv2
import numpy as np
from pathlib import Path, PurePath
from tqdm import tqdm

def crop_to_eye(im):
    mask = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(mask, 10, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    max_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(max_contour)
    cropped_im = im[y:y + h, x:x + w]
    return cropped_im

def create_LACDHS_quality_test_dataset(test_dir: str, output_dir: str, long_df: pd.DataFrame) -> str:
    """
    Creates a test dataset for LACDHS image quality classification by organizing images into folders
    based on their Image_Quality from the provided long DataFrame, filtered for Field 2 images and cropped to the eye.
    Maps 'Good' to '690J' and 'Bad' to '692J'.

    Parameters:
    - test_dir (str): Path to the raw test dataset bag.
    - output_dir (str): Path to the output directory where the organized dataset will be created.
    - long_df (pd.DataFrame): DataFrame containing Image_Quality information.

    Returns:
    - str: The path to the test directory.
    """
    # Define the class mapping
    class_mapping = {'Good': '690J', 'Bad': '692J'}
    
    # Read the Image CSV
    image_csv_path = os.path.join(test_dir, 'data', 'Image.csv')
    image_df = pd.read_csv(image_csv_path)
    
    # Filter for Field 2 images
    image_df = image_df[image_df['Image_Angle_Vocab'] == "2SK6"]
    
    # Merge dataframes, keeping only the rows that match with long_df
    merged_df = image_df.merge(long_df[['Image', 'Image_Quality']], left_on='RID', right_on='Image', how='inner')
    
    # Map 'Good' and 'Bad' to their corresponding codes
    merged_df['Image_Quality_Code'] = merged_df['Image_Quality'].map(class_mapping)
    
    # Filter out any rows where mapping didn't work (i.e., Image_Quality was neither 'Good' nor 'Bad')
    merged_df = merged_df.dropna(subset=['Image_Quality_Code'])
    
    image_root_path = os.path.join(test_dir, 'data', 'assets', 'Image')
    
    # Add tqdm progress bar
    for _, row in tqdm(merged_df.iterrows(), total=len(merged_df), desc="Processing test dataset"):
        quality_code = row['Image_Quality_Code']
        filename = row['Filename']
        src_path = os.path.join(image_root_path, filename)
        
        # Read the image
        img = cv2.imread(src_path)
        if img is None:
            print(f"Warning: Could not read image {src_path}")
            continue
        
        # Crop the image to the eye
        cropped_img = crop_to_eye(img)
        
        dst_dir = os.path.join(output_dir, 'test', quality_code)
        os.makedirs(dst_dir, exist_ok=True)
        dst_path = os.path.join(dst_dir, filename)
        
        # Save the cropped image
        cv2.imwrite(dst_path, cropped_img)
    
    # Print the count of images in each category
    print("\nImage count for test dataset:")
    print(merged_df['Image_Quality_Code'].value_counts())

    test_path = os.path.join(output_dir, 'test')
    return test_path



In [None]:
configuration_records.working_dir

In [None]:
# @title Data Preprocessing (Filtering Image.csv for just Field_2 Images)

test_dir = str(configuration_records.bag_paths[0])

# Call the create_LACDHS_quality_test_dataset function
test_path = create_LACDHS_quality_test_dataset(
    test_dir=test_dir,
    output_dir=str(configuration_records.working_dir),
    long_df = long
) 

# Print the paths to verify
print("Test dataset path:", test_path)

In [None]:
import os

def count_files(directory):
    return len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])

def analyze_lacdhs_angle_dataset(base_path):
    main_folders = ['test'] #, 
    
    for main_folder in main_folders:
        main_folder_path = os.path.join(base_path, main_folder)
        if not os.path.exists(main_folder_path):
            print(f"{main_folder} folder not found")
            continue
        
        print(f"\nAnalyzing {main_folder} folder:")
        
        total_files = 0
        for angle_folder in os.listdir(main_folder_path):
            angle_folder_path = os.path.join(main_folder_path, angle_folder)
            if os.path.isdir(angle_folder_path):
                file_count = count_files(angle_folder_path)
                print(f"  {angle_folder}: {file_count} images")
                total_files += file_count
        
        print(f"Total images in {main_folder}: {total_files}")

# Usage
base_path = "/data/sreenidhi/EyeAI_working/"
analyze_lacdhs_angle_dataset(base_path)



In [None]:
import os
import random
import matplotlib.pyplot as plt
from PIL import Image

def visualize_lacdhs_quality_dataset(base_path, samples_per_angle=6):
    main_folders = ['test'] #, 'test'
    
    for main_folder in main_folders:
        main_folder_path = os.path.join(base_path, main_folder)
        if not os.path.exists(main_folder_path):
            print(f"{main_folder} folder not found")
            continue
        
        print(f"\nVisualizing samples from {main_folder} folder:")
        
        angle_folders = [f for f in os.listdir(main_folder_path) if os.path.isdir(os.path.join(main_folder_path, f))]
        
        # Calculate grid size
        n_angles = len(angle_folders)
        n_cols = samples_per_angle
        n_rows = n_angles
        
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols*3, n_rows*3.5))
        fig.suptitle(f'Sample Images from {main_folder.capitalize()} Set', fontsize=16)
        
        for i, angle_folder in enumerate(angle_folders):
            angle_folder_path = os.path.join(main_folder_path, angle_folder)
            image_files = [f for f in os.listdir(angle_folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
            
            if len(image_files) < samples_per_angle:
                print(f"Warning: Not enough images in {angle_folder}. Using all available images.")
                selected_files = image_files
            else:
                selected_files = random.sample(image_files, samples_per_angle)
            
            for j, image_file in enumerate(selected_files):
                img_path = os.path.join(angle_folder_path, image_file)
                img = Image.open(img_path)
                axes[i, j].imshow(img)
                axes[i, j].axis('off')
                
                # Add image filename as title for each subplot
                axes[i, j].set_title(image_file, fontsize=8)
                
                if j == 0:
                    axes[i, j].set_ylabel(angle_folder, rotation=0, labelpad=40, va='center', fontsize=10)
        
        plt.tight_layout()
        plt.subplots_adjust(top=0.95, bottom=0.05, left=0.2, right=0.98)
        plt.show()
        
        # Print confirmation of angles
        print(f"Angles in {main_folder} set:")
        for angle in angle_folders:
            print(f"  - {angle}")

# Usage
base_path = "/data/sreenidhi/EyeAI_working/"
# visualize_lacdhs_quality_dataset(base_path)

In [None]:

output_path = str(EA.working_dir) + "/Execution_Assets/" + configuration_records.vocabs['Execution_Asset_Type'][0].name
os.mkdir(output_path)

In [None]:
output_path

In [None]:
best_hyper_parameters_json_path = str(configuration_records.assets_paths[0])

In [None]:
best_hyper_parameters_json_path

In [None]:
model_path = str(configuration_records.assets_paths[1])

In [None]:
model_path

In [None]:
import json

# Open and load the JSON file
with open(best_hyper_parameters_json_path, 'r') as file:
    data = json.load(file)

# Print the contents of the JSON file
print(json.dumps(data, indent=4))

In [None]:

# @title Execute Evaluation algorithm

from eye_ai.models.vgg19_lacdhs_quality_predict import predict_and_evaluate
with EA.execution(execution_rid=configuration_records.execution_rid) as exec:
    predict_and_evaluate(
        model_path=model_path,
        image_path=test_path,
        output_dir=output_path,
        best_hyperparameters_json_path=best_hyper_parameters_json_path
    )
    


In [None]:
# # @title Save Execution Assets (model) and Metadata
uploaded_assets = EA.execution_upload(configuration_records.execution_rid, True)