In [1]:
import os
import sys

# Add the repository root to the Python path
repo_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.insert(0, repo_root)

# Set the working directory to the repository root
os.chdir(repo_root)

In [2]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetSpec, ExecutionConfiguration, DerivaML, Workflow
from deriva_ml import MLVocab as vc
from deriva_ml.deriva_definitions import ColumnDefinition, BuiltinTypes
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [3]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
host = 'dev.eye-ai.org'
# host = 'www.eye-ai.org'
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

You are already logged in.


In [4]:
cache_dir = '/Users/vivi/Desktop/eye_ai/execution_cache'
working_dir = '/Users/vivi/Desktop/eye_ai/execution_cache'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [7]:
source_dataset = '2-7P5P'

EA.add_term("Workflow_Type", term_name="Feature_Creation", 
            description="Workflow for feature creation")

workflow_instance = Workflow(
    name="Create Condition_Label feature",
    workflow_type="Feature_Creation",
    url='https://github.com/informatics-isi-edu/eye-ai-exec/blob/401b5e35129da24017277a00b0ac429c421396bb--/notebooks/feature/create_condition_label_feature.ipynb',
    is_notebook=True
)

config = ExecutionConfiguration(
    datasets=[DatasetSpec(rid=source_dataset, version=EA.dataset_version(source_dataset), materialize=False)],
    workflow= workflow_instance, # dev'5-SG9W'
    description="Create Condition_Label for multimodal data")

# Initialize execution
execution = EA.create_execution(config)

Execution RID: https://dev.eye-ai.org/id/eye-ai/5-Y0V2@33D-D3Y7-61V0

2025-06-03 17:58:20,154 - INFO - Materialize bag 2-7P5P... 
2025-06-03 17:58:25,559 - INFO - Wrote configuration file: /Users/vivi/.bdbag/bdbag.json
2025-06-03 17:58:25,562 - INFO - Attempting GET from URL: https://eye-ai-shared.s3.amazonaws.com/6656e08709b2f7c0bbed8284fdfb550d/2025-06-03_17.44.04/Dataset_2-7P5P.zip
2025-06-03 17:58:29,341 - INFO - File [/Users/vivi/Desktop/eye_ai/repos/eye-ai-exec/Dataset_2-7P5P.zip] transfer complete. 6.373 MB transferred at 2.14 MB/second. Elapsed time: 0:00:02.985160.
2025-06-03 17:58:29,345 - INFO - Extracting ZIP archived file: /Users/vivi/Desktop/eye_ai/repos/eye-ai-exec/Dataset_2-7P5P.zip
2025-06-03 17:58:29,504 - INFO - File /Users/vivi/Desktop/eye_ai/repos/eye-ai-exec/Dataset_2-7P5P.zip was successfully extracted to directory /Users/vivi/Desktop/eye_ai/execution_cache/2-7P5P_c927e5b390530adf1e9987a518dd6d740631eea5b3956d1bd697d7b4c4438842/Dataset_2-7P5P
2025-06-03 17:58:29,505 - INFO - Validating bag structure: /Users/vivi/Desktop/eye_ai/exec

In [8]:
print(execution)

caching_dir: /Users/vivi/Desktop/eye_ai/execution_cache
_working_dir: /Users/vivi/Desktop/eye_ai/execution_cache/vivi/EyeAI_working
execution_rid: 5-Y0V2
workflow_rid: 5-Y0TW
asset_paths: {}
configuration: datasets=[DatasetSpec(rid='2-7P5P', materialize=False, version=DatasetVersion(major=4, minor=1, patch=2))] assets=[] workflow=Workflow(name='Create Condition_Label feature', url='https://github.com/informatics-isi-edu/eye-ai-exec/blob/401b5e35129da24017277a00b0ac429c421396bb--/notebooks/feature/create_condition_label_feature.ipynb', workflow_type='Feature_Creation', version=None, description=None, rid=None, checksum=None, is_notebook=True) parameters={} description='Create Condition_Label for multimodal data' argv=['/opt/anaconda3/envs/eye_ai/lib/python3.12/site-packages/ipykernel_launcher.py', '--f=/Users/vivi/Library/Jupyter/runtime/kernel-v35a9dde05e98689cce3b83bd75c4c7fc0cbf79fe3.json']


# Create Feature

In [28]:
severity_cv = EA.create_vocabulary(vocab_name='Severity_Label', schema='eye-ai')
severity_feature = EA.create_feature(target_table='Clinical_Records', feature_name='Glaucoma_Severity', 
                  terms=[severity_cv])

# Compute Feature
## Retrived data

In [13]:
ds_bag = execution.datasets[0]
tables = ds_bag.list_tables()
# tables

['deriva-ml:Asset_Role',
 'deriva-ml:Asset_Type',
 'deriva-ml:Dataset',
 'deriva-ml:Dataset_Dataset',
 'deriva-ml:Dataset_Dataset_Type',
 'deriva-ml:Dataset_Execution',
 'deriva-ml:Dataset_Type',
 'deriva-ml:Dataset_Version',
 'deriva-ml:Execution',
 'deriva-ml:Execution_Asset',
 'deriva-ml:Execution_Asset_Asset_Type',
 'deriva-ml:Execution_Asset_Execution',
 'deriva-ml:Execution_Asset_Type',
 'deriva-ml:Execution_Metadata',
 'deriva-ml:Execution_Metadata_Asset_Type',
 'deriva-ml:Execution_Metadata_Execution',
 'deriva-ml:Execution_Metadata_Type',
 'deriva-ml:Feature_Name',
 'deriva-ml:File',
 'deriva-ml:File_File_Type',
 'deriva-ml:File_Type',
 'deriva-ml:Workflow',
 'deriva-ml:Workflow_Type',
 'eye-ai:Annotation',
 'eye-ai:Annotation_Function',
 'eye-ai:Annotation_Type',
 'eye-ai:Clinical_Records',
 'eye-ai:Clinical_Records_ICD10_Eye',
 'eye-ai:Clinical_Records_Observation',
 'eye-ai:Condition_Label',
 'eye-ai:Diagnosis_Image',
 'eye-ai:Diagnosis_Observation',
 'eye-ai:Diagnosis_Stat

In [44]:
clinical_records = ds_bag.get_table_as_dataframe('Clinical_Records')
icd_mapping = ds_bag.get_table_as_dataframe('Clinical_Records_ICD10_Eye')
icd_codes = ds_bag.get_table_as_dataframe('ICD10_Eye')[['Name']]

## Attatch label to the icd discription

In [45]:
# GS, Mild Glaucoma, Moderate Glaucoma, Severe Glaucoma, Unspecified/Indeterminate Glaucoma
glaucoma_codes = icd_codes[icd_codes['Name'].str.startswith('H40')]
glaucoma_codes.loc[glaucoma_codes['Name'].str.startswith('H40.0'),'Label'] = 'GS'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '0', 'Label'] = 'Unspecified/Indeterminate'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '1', 'Label'] = 'Mild'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '2', 'Label'] = 'Moderate'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '3', 'Label'] = 'Severe'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '4', 'Label'] = 'Unspecified/Indeterminate'
glaucoma_codes.dropna(subset=['Label'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glaucoma_codes.loc[glaucoma_codes['Name'].str.startswith('H40.0'),'Label'] = 'GS'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  glaucoma_codes.dropna(subset=['Label'], inplace=True)


## Attatch label to clinical records

In [47]:
mapping = icd_mapping[['Clinical_Records', 
                       'ICD10_Eye']].merge(glaucoma_codes, 
                                           left_on='ICD10_Eye', 
                                           right_on='Name', 
                                           how='left')
# Define severity order
severity_order = {
    'Severe': 4,
    'Moderate': 3,
    'Mild': 2,
    'Unspecified/Indeterminate': 1,
    'GS': 0
}

# Create a function to get the most severe label
def get_most_severe(labels):
    if labels.isna().all():
        return pd.NA
    valid_labels = labels.dropna()
    if len(valid_labels) == 0:
        return pd.NA
    return max(valid_labels, key=lambda x: severity_order.get(x, -1))

# Group by clinical_records and apply the function
result = mapping.groupby('Clinical_Records')['Label'].agg(get_most_severe).reset_index()
result.dropna(subset=['Label'], inplace=True)
result

Unnamed: 0,Clinical_Records,Label
0,5-Q50Y,Moderate
1,5-Q510,Moderate
2,5-Q512,Moderate
3,5-Q514,Moderate
4,5-Q516,Moderate
...,...,...
3719,5-QCAG,GS
3720,5-QCAJ,GS
3721,5-QCAM,GS
3722,5-QCAP,GS


# Feature ingestion

In [49]:
EA.find_features('Clinical_Records')

[Feature(target_table=Clinical_Records, feature_name=Glaucoma_Severity, feature_table=Execution_Clinical_Records_Glaucoma_Severity)]

In [50]:
feature_name = 'Glaucoma_Severity'
Feature = EA.feature_record_class('Clinical_Records', feature_name)
from IPython.display import Markdown, display
display(
    Markdown('### Feature Name'),
    [ f'Name: {c.name}, Required: {not c.nullok}' for c in Feature.feature.feature_columns]
)

In [76]:
EA.add_term("Severity_Label", term_name="Moderate", description="Moderate stage")
EA.add_term("Severity_Label", term_name="Severe", description="Severe stage")
EA.add_term("Severity_Label", term_name="Mild", description="Mild stage")
EA.add_term("Severity_Label", term_name="Unspecified/Indeterminate", 
            description="Indeterminate stage or stage unspecified")
EA.add_term("Severity_Label", term_name="GS", description="Glaucoma Suspect")

severe_feature_list = [Feature(
    Execution=execution.execution_rid,
    Clinical_Records=row['Clinical_Records'],
    Severity_Label=row['Label'],
    Feature_Name=feature_name,
    ) for index, row in result.iterrows()]

In [89]:
execution.add_features(severe_feature_list)

In [90]:
execution.upload_execution_outputs()

2025-06-03 23:22:03,804 - INFO - Uploading execution files...
2025-06-03 23:22:04,088 - INFO - Initializing uploader: GenericUploader v1.7.8 [Python 3.12.9, macOS-15.5-arm64-arm-64bit]
2025-06-03 23:22:04,251 - INFO - Attempting to acquire a dependent lock in [/Users/vivi/Desktop/eye_ai/execution_cache/vivi/EyeAI_working/deriva-ml/execution/5-Y0V2/asset]
2025-06-03 23:22:04,253 - INFO - Scanning files in directory [/Users/vivi/Desktop/eye_ai/execution_cache/vivi/EyeAI_working/deriva-ml/execution/5-Y0V2/asset]...
2025-06-03 23:22:04,254 - INFO - Skipping file: [/Users/vivi/Desktop/eye_ai/execution_cache/vivi/EyeAI_working/deriva-ml/execution/5-Y0V2/asset/.DS_Store] -- Invalid file type or directory location.
2025-06-03 23:22:04,255 - INFO - Skipping file: [/Users/vivi/Desktop/eye_ai/execution_cache/vivi/EyeAI_working/deriva-ml/execution/5-Y0V2/asset/deriva-ml/.DS_Store] -- Invalid file type or directory location.
2025-06-03 23:22:04,255 - INFO - Including file: [/Users/vivi/Desktop/eye_

{'deriva-ml/Execution_Metadata': [AssetFilePath('/Users/vivi/Desktop/eye_ai/execution_cache/vivi/EyeAI_working/deriva-ml/execution/5-Y0V2/asset/deriva-ml/Execution_Metadata/configuration.json'),
  AssetFilePath('/Users/vivi/Desktop/eye_ai/execution_cache/vivi/EyeAI_working/deriva-ml/execution/5-Y0V2/asset/deriva-ml/Execution_Metadata/environment_snapshot_20250603_175832.txt')]}