In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration, DatasetVersion
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
datasets = [
    '4-4116', # Selected images for training
    '4-411G', # Selected images for testing
    '2-7P5P', # Full multimodal dataset
    ]

to_be_download = []
for dataset in datasets:
    ds_dict = {
        'rid': dataset,
        'materialize':True,
        'version':EA.dataset_version(dataset_rid=dataset),
    }
    to_be_download.append(ds_dict)

workflow_instance = EA.add_workflow(Workflow(
    name="Multimodal workflow",
    url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/Multimodal_Huy/multimodal_extract_images_template.ipynb",
    workflow_type="Multimodal workflow"
))

config = ExecutionConfiguration(
    datasets=to_be_download,
    workflow=workflow_instance,
    description="Instance of getting images in multimodal dataset.")

execution = EA.create_execution(config)

In [None]:
print(execution)

In [None]:
training_ds_bag = execution.datasets[0]
testing_ds_bag = execution.datasets[1]

multimodal_full_ds_bag = execution.datasets[2]

In [None]:
def get_dataframe_from_bag(ds_bag: DatasetBag, multimodal_full_ds_bag: DatasetBag):
    observation_table = ds_bag.get_table_as_dataframe('Observation')
    image_table = ds_bag.get_table_as_dataframe('Image')
    laterality_table = ds_bag.get_table_as_dataframe('Execution_Image_Fundus_Laterality')

    image_table_filtered = image_table[['RID', 'Filename', 'Observation']].rename(columns={'RID': 'RID_Image'})
    laterality_table_filtered = laterality_table[['Image', 'Image_Side']].rename(columns={'Image': 'RID_Image'})
    image_laterality = pd.merge(image_table_filtered, laterality_table_filtered, left_on='RID_Image', right_on='RID_Image', how='inner')
    observation_table_filtered = observation_table[['RID',  'Subject']].rename(columns={'RID': 'RID_Observation'})
    image_laterality_observation = pd.merge(image_laterality, observation_table_filtered, left_on='Observation', right_on='RID_Observation', how='inner')

    wide = EA.multimodal_wide(multimodal_full_ds_bag) 

    image_observation_laterality_subject_wide = pd.merge(
     wide, 
     image_laterality_observation, 
     left_on=['RID_Subject', 'Image_Side'], 
     right_on=['Subject', 'Image_Side'], 
     how='inner'
    )

    return image_observation_laterality_subject_wide

In [None]:
train_df = get_dataframe_from_bag(training_ds_bag, multimodal_full_ds_bag)
test_df= get_dataframe_from_bag(testing_ds_bag, multimodal_full_ds_bag)

In [None]:
#Print out the columns in the dataframe so you can filter based on your requirements.
train_df