In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetSpec, DatasetBag, Workflow, ExecutionConfiguration, VersionPart
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# host = 'www.eye-ai.org'
host = 'dev.eye-ai.org'
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
# RID of source dataset, if any.
source_dataset = '6-ACEJ' # Prod '4-YZPA' test'5-2GGR' Dev '5-YSK2' test'6-ACAJ'

EA.add_term(vc.workflow_type, "Test Workflow", description="A test Workflow for new DM")
# Workflow instance
workflow_instance = EA.create_workflow(
    name="AI-Readi data template",
    workflow_type="Test Workflow",
)

# Configuration instance.
config = ExecutionConfiguration(
    datasets=[DatasetSpec(rid=source_dataset, version=EA.dataset_version(source_dataset), materialize=True)],
    # Materialize set to False if you only need the metadata from the bag, and not the assets,
    workflow=workflow_instance,
    description="Template instance of an AI-Readi data workflow")

# Initialize execution
execution = EA.create_execution(config)


In [None]:
ds_bag = execution.datasets[0]

In [44]:
sys_col = ['RMB', 'RCB', 'RMT', 'RCT']
subject = ds_bag.get_table_as_dataframe('Subject').drop(columns=sys_col).rename(columns={'RID': 'Subject'})
subject
observation = ds_bag.get_table_as_dataframe('Observation').drop(columns=sys_col).rename(columns={'RID': 'Observation',
                                                                                                'Date_of_Encoutner': 'Date_of_Encoutner_Observation'})
image = ds_bag.get_table_as_dataframe('Image').drop(columns=sys_col).rename(columns={'RID': 'Image',
                                                                                    'Date_of_Encoutner': 'Date_of_Encoutner_Image'})
device_detail = ds_bag.get_table_as_dataframe('Device').drop(columns=sys_col).rename(columns={'RID': 'Device'})
clinic = ds_bag.get_table_as_dataframe('AIREADI_Clinical_Records').drop(columns=sys_col).rename(columns={'RID': 'Clinic',
                                                                                                        'Date_of_Encounter': 'Date_of_Encoutner_Clinic'})

In [45]:
clinic

Unnamed: 0,Clinic,Observation,Subject,Date_of_Encoutner_Clinic,Clinical_Collection
0,6-8Y5A,5-YNGC,5-YKD4,2024-02-23 00:00:00,"[""{\""px280901_metadata\"": {\""clinical_id\"": \""..."
1,6-8Y5C,5-YNGC,5-YKD4,2024-03-19 00:00:00,"[""{\""viaomthd\"": {\""clinical_id\"": \""obs_21561..."
2,6-8Y5E,5-YNGC,5-YKD4,2024-04-09 00:00:00,"[""{\""eos_dsstdat\"": {\""clinical_id\"": \""obs_21..."
3,6-8Y6G,5-YNGP,5-YKD2,2023-08-30 00:00:00,"[""{\""Potassium (mEq/L)\"": {\""clinical_id\"": \""..."
4,6-8Y88,5-YNHA,5-YKDA,2024-03-15 00:00:00,"[""{\""BUN/Creatinine ratio\"": {\""clinical_id\"":..."
...,...,...,...,...,...
108,6-985Y,5-YSGE,5-YKDJ,2023-12-08 00:00:00,"[""{\""INSULIN (ng/mL)\"": {\""clinical_id\"": \""me..."
109,6-98AW,5-YSJJ,5-YKCR,2024-05-11 00:00:00,"[""{\""mhoccur_circ\"": {\""clinical_id\"": \""obs_1..."
110,6-98AY,5-YSJJ,5-YKCR,2024-05-12 00:00:00,"[""{\""pxne14\"": {\""clinical_id\"": \""obs_115368\..."
111,6-98B0,5-YSJJ,5-YKCR,2024-06-11 00:00:00,"[""{\""cm_slp\"": {\""clinical_id\"": \""obs_115443\..."


In [46]:
wide = subject.merge(observation, on='Subject', how='left')
wide = wide.merge(image, on='Observation', how='left')
wide = wide.merge(clinic, on='Observation', how='left')
# visit_occurance -> dateofencouter
# visit_id -> Observation ID
wide

Unnamed: 0,Subject_x,Subject_ID,Subject_Gender,Subject_Ethnicity,Observation,Observation_ID,hba1c,glaucoma_hx,visual_acuity_right,visual_acuity_left,...,Image_Angle,Image_Tag,Date_of_Encounter,Model_Device,Manufacturer,Device,Clinic,Subject_y,Date_of_Encoutner_Clinic,Clinical_Collection
0,5-YKCR,AIREADI_4220,unknown,unknown,5-YRKW,AIREADI_908,,,,,...,,,,,,,6-961A,5-YKCR,2024-06-11 00:00:00,"[""{\""Chloride (mEq/L)\"": {\""clinical_id\"": \""m..."
1,5-YKCR,AIREADI_4220,unknown,unknown,5-YSJJ,AIREADI_907,,,,,...,,,2024-06-11 00:00:00,EidonFA,CenterVue,,6-98AW,5-YKCR,2024-05-11 00:00:00,"[""{\""mhoccur_circ\"": {\""clinical_id\"": \""obs_1..."
2,5-YKCR,AIREADI_4220,unknown,unknown,5-YSJJ,AIREADI_907,,,,,...,,,2024-06-11 00:00:00,EidonFA,CenterVue,,6-98AY,5-YKCR,2024-05-12 00:00:00,"[""{\""pxne14\"": {\""clinical_id\"": \""obs_115368\..."
3,5-YKCR,AIREADI_4220,unknown,unknown,5-YSJJ,AIREADI_907,,,,,...,,,2024-06-11 00:00:00,EidonFA,CenterVue,,6-98B0,5-YKCR,2024-06-11 00:00:00,"[""{\""cm_slp\"": {\""clinical_id\"": \""obs_115443\..."
4,5-YKCR,AIREADI_4220,unknown,unknown,5-YSJJ,AIREADI_907,,,,,...,,,2024-06-11 00:00:00,EidonFA,CenterVue,,6-98B2,5-YKCR,2024-06-28 00:00:00,"[""{\""eos_ds\"": {\""clinical_id\"": \""obs_115453\..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3538,5-YKE8,AIREADI_7320,unknown,unknown,5-YPKP,AIREADI_734,,,,,...,,,2024-06-06 00:00:00,Spectralis,HeidelbergEngineering,,6-90WW,5-YKE8,2024-05-07 00:00:00,"[""{\""diet2\"": {\""clinical_id\"": \""obs_92702\"",..."
3539,5-YKE8,AIREADI_7320,unknown,unknown,5-YPKP,AIREADI_734,,,,,...,,,2024-06-06 00:00:00,Spectralis,HeidelbergEngineering,,6-90WY,5-YKE8,2024-06-06 00:00:00,"[""{\""cm_slp\"": {\""clinical_id\"": \""obs_92823\""..."
3540,5-YKE8,AIREADI_7320,unknown,unknown,5-YPKP,AIREADI_734,,,,,...,,,2024-06-06 00:00:00,Spectralis,HeidelbergEngineering,,6-90X0,5-YKE8,2024-06-10 00:00:00,"[""{\""lbdattim1\"": {\""clinical_id\"": \""obs_9283..."
3541,5-YKE8,AIREADI_7320,unknown,unknown,5-YPKP,AIREADI_734,,,,,...,,,2024-06-06 00:00:00,Spectralis,HeidelbergEngineering,,6-90X2,5-YKE8,2024-06-18 00:00:00,"[""{\""eos_ds\"": {\""clinical_id\"": \""obs_92833\""..."


In [53]:
file_path = execution.asset_file_path(
    asset_name="Execution_Asset",
    file_name="aireadi_wide_table.csv",
    asset_types="wide_table"
)
wide.to_csv(file_path)

In [54]:
execution.upload_execution_outputs(clean_folder=True)

2025-08-05 18:50:22,979 - INFO - Uploading execution files...
2025-08-05 18:50:23,335 - INFO - Initializing uploader: GenericUploader v1.7.10 [Python 3.10.13, Linux-5.10.210-201.852.amzn2.x86_64-x86_64-with-glibc2.26]
2025-08-05 18:50:24,584 - INFO - Scanning files in directory [/data/lizhiwei/EyeAI_working/deriva-ml/execution/6-ACH0/asset]...
2025-08-05 18:50:24,585 - INFO - Including file: [/data/lizhiwei/EyeAI_working/deriva-ml/execution/6-ACH0/asset/deriva-ml/Execution_Asset/aireadi_wide_table.csv].
2025-08-05 18:50:24,586 - INFO - Processing: [/data/lizhiwei/EyeAI_working/deriva-ml/execution/6-ACH0/asset/deriva-ml/Execution_Asset/aireadi_wide_table.csv]
2025-08-05 18:50:24,587 - INFO - Computed metadata for: [/data/lizhiwei/EyeAI_working/deriva-ml/execution/6-ACH0/asset/deriva-ml/Execution_Asset/aireadi_wide_table.csv].
2025-08-05 18:50:24,587 - INFO - Computing checksums for file: [/data/lizhiwei/EyeAI_working/deriva-ml/execution/6-ACH0/asset/deriva-ml/Execution_Asset/aireadi_wid

{'deriva-ml/Execution_Asset': [AssetFilePath('/data/lizhiwei/EyeAI_working/deriva-ml/execution/6-ACH0/asset/deriva-ml/Execution_Asset/aireadi_wide_table.csv')]}