In [None]:
repo_dir = "Repos"  
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-vgg19" / "src" / "vgg19"))
sys.path.insert(0, str(Path.home() / repo_dir / "RETFound_MAE"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-exec" / "models" / "vgg19")) 

In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration, DatasetVersion
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
datasets = [
    '4-4116', # Selected images for training
    '4-411G', # Selected images for testing
    '2-7P5P', # Full multimodal dataset
    ]

to_be_download = []
for dataset in datasets:
    ds_dict = {
        'rid': dataset,
        'materialize':True,
        'version':EA.dataset_version(dataset_rid=dataset),
    }
    to_be_download.append(ds_dict)

workflow_instance = EA.create_workflow(
    name="Multimodal workflow",
    workflow_type="Multimodal workflow"
)

config = ExecutionConfiguration(
    datasets=to_be_download,
    assets = ['2-4JR6',],
    workflow=workflow_instance,
    description="Instance of cropping multimodal images.")

execution = EA.create_execution(config)

In [None]:
print(execution)

In [None]:
training_ds_bag = execution.datasets[0]
testing_ds_bag = execution.datasets[1]
multimodal_full_ds_bag = execution.datasets[2]

crop_image_model = execution.asset_paths[0]

In [None]:
def get_dataframe_from_bag(ds_bag: DatasetBag, multimodal_full_ds_bag: DatasetBag):
    observation_table = ds_bag.get_table_as_dataframe('Observation')
    image_table = ds_bag.get_table_as_dataframe('Image')
    laterality_table = ds_bag.get_table_as_dataframe('Execution_Image_Fundus_Laterality')

    image_table_filtered = image_table[['RID', 'Filename', 'Observation']].rename(columns={'RID': 'RID_Image'})
    laterality_table_filtered = laterality_table[['Image', 'Image_Side']].rename(columns={'Image': 'RID_Image'})
    image_laterality = pd.merge(image_table_filtered, laterality_table_filtered, left_on='RID_Image', right_on='RID_Image', how='inner')
    observation_table_filtered = observation_table[['RID',  'Subject']].rename(columns={'RID': 'RID_Observation'})
    image_laterality_observation = pd.merge(image_laterality, observation_table_filtered, left_on='Observation', right_on='RID_Observation', how='inner')

    wide = EA.multimodal_wide(multimodal_full_ds_bag) 
    
    image_observation_laterality_subject_wide = pd.merge(
     wide, 
     image_laterality_observation, 
     left_on=['RID_Subject', 'Image_Side'], 
     right_on=['Subject', 'Image_Side'], 
     how='inner'
    )

    return image_observation_laterality_subject_wide

In [None]:
train_df = get_dataframe_from_bag(training_ds_bag, multimodal_full_ds_bag)
test_df= get_dataframe_from_bag(testing_ds_bag, multimodal_full_ds_bag)

In [None]:
from pathlib import Path

# Ensure working_dir is a Path object
working_dir = Path("~/working_dir")

# Save DataFrames as CSV files
train_csv_path = working_dir / "train.csv"
test_csv_path = working_dir / "test.csv"

train_df.to_csv(train_csv_path, index=False)
test_df.to_csv(test_csv_path, index=False)

In [None]:
table_name = 'Image'
EA.find_features(table_name)

In [None]:
feature_name = 'Annotation'
Feature = EA.feature_record_class(table_name, feature_name)

In [None]:
from IPython.display import Markdown, display
display(
    Markdown('### Feature Name'),
    [ f'Name: {c.name}, Required: {not c.nullok}' for c in Feature.feature.feature_columns]
)
         

In [None]:
cl = [c for c in Feature.feature.feature_columns]
help(cl[0].type)

In [None]:
# asset_path_models = execution.execution_asset_path("Diagnosis_Model")
asset_path_output = execution.execution_asset_path("Image_Annotation")
asset_path_output

In [None]:
bb_csv_path, bb_asset_paths = execution.feature_paths('Image', 'Annotation')
bb_asset_paths

In [None]:
import shutil
from pathlib import Path

src_dir = Path("/data/nguyent8/EyeAI_working/deriva-ml/execution/4-S524/execution-asset/Image_Annotation")
dst_dir = Path("/data/nguyent8/EyeAI_working/deriva-ml/execution/4-S524/feature/eye-ai/Image/Annotation/asset/Fundus_Bounding_Box")

dst_dir.mkdir(parents=True, exist_ok=True)

for file in src_dir.iterdir():
    if file.is_file():
        shutil.move(str(file), dst_dir / file.name)


In [None]:
from vgg19_disk_crop_predict import preprocess_and_crop
with execution.execute() as exec:
    preprocess_and_crop(
      multimodal_full_ds_bag,
      '~/working_dir/train.csv',
      '~/working_dir/output/output_train.csv',
      'template.jpg',
      str(asset_path_output),
      crop_image_model,
      "2-NK8E",
      "Optic Nerve",
      False
      )
    
    preprocess_and_crop(
      multimodal_full_ds_bag,
      '~/working_dir/test.csv',
      '~/working_dir/output/output_test.csv',
      'template.jpg',
      str(asset_path_output),
      crop_image_model,
      "2-NK8E",
      "Optic Nerve",
      False
      )

In [None]:
print("checkpoint test 1")

In [None]:
ImageBoundingboxFeature = EA.feature_record_class("Image", feature_name)
ImageBoundingboxFeature

In [None]:
image_rids = []
for file_name in os.listdir(asset_path_output):
    image_rids.append(file_name.split("_")[1].split('.')[0])

In [None]:
csv_train = Path("~/working_dir/output/output_train.csv")
df = pd.read_csv(csv_train)

# Create a mapping from Image RID to Worked Image Cropping Function
cropping_func_map_train = dict(zip(df["Image RID"], df["Worked Image Cropping Function"]))

In [None]:
csv_test = Path("~/working_dir/output/output_test.csv")
df = pd.read_csv(csv_test)

# Create a mapping from Image RID to Worked Image Cropping Function
cropping_func_map_test = dict(zip(df["Image RID"], df["Worked Image Cropping Function"]))

In [None]:
# Generate the list
image_bounding_box_feature_list = []
for image_rid in image_rids:
    if (asset_path_output / f"Cropped_{image_rid}.svg").exists():
        if image_rid in cropping_func_map_train:
            image_bounding_box_feature_list.append(
                ImageBoundingboxFeature(
                    Image=image_rid,
                    Execution=execution.execution_rid,
                    Fundus_Bounding_Box=asset_path_output / f"Cropped_{image_rid}.svg",
                    Annotation_Function=cropping_func_map_train.get(image_rid),
                    Annotation_Type='Optic Nerve',
                ))
        if image_rid in cropping_func_map_test:
            image_bounding_box_feature_list.append(
                ImageBoundingboxFeature(
                    Image=image_rid,
                    Execution=execution.execution_rid,
                    Fundus_Bounding_Box=asset_path_output / f"Cropped_{image_rid}.svg",
                    Annotation_Function=cropping_func_map_test.get(image_rid),
                    Annotation_Type='Optic Nerve',
                ))
        

In [None]:
image_bounding_box_feature_list

In [None]:
# execution.write_feature_file(image_bounding_box_feature_list)
# execution.upload_execution_outputs()