In [1]:
import os
import sys

# Add the repository root to the Python path
repo_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.insert(0, repo_root)

# Set the working directory to the repository root
os.chdir(repo_root)

In [2]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetSpec, ExecutionConfiguration, DerivaML, Workflow
from deriva_ml import MLVocab as vc
from deriva_ml.deriva_definitions import ColumnDefinition, BuiltinTypes
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
host = 'dev.eye-ai.org'
# host = 'www.eye-ai.org'
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [4]:
cache_dir = '/Users/vivi/Desktop/eye_ai/execution_cache'
working_dir = '/Users/vivi/Desktop/eye_ai/execution_cache'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
source_dataset = '2-7P5P'

EA.add_term("Workflow_Type", term_name="Feature_Creation", 
            description="Workflow for feature creation")

workflow_instance = Workflow(
    name="Create Condition_Label feature",
    workflow_type="Feature_Creation",
    url='https://github.com/informatics-isi-edu/eye-ai-exec/blob/401b5e35129da24017277a00b0ac429c421396bb--/notebooks/feature/create_condition_label_feature.ipynb',
    is_notebook=True
)

config = ExecutionConfiguration(
    datasets=[DatasetSpec(rid=source_dataset, version=EA.dataset_version(source_dataset), materialize=False)],
    workflow= workflow_instance, # dev'5-SG9W'
    description="Create Condition_Label for multimodal data")

# Initialize execution
execution = EA.create_execution(config)

In [None]:
print(execution)

# Create Feature

In [28]:
severity_cv = EA.create_vocabulary(vocab_name='Severity_Label', schema='eye-ai')
severity_feature = EA.create_feature(target_table='Clinical_Records', feature_name='Glaucoma_Severity', 
                  terms=[severity_cv])

# Compute Feature
## Retrived data

In [None]:
ds_bag = execution.datasets[0]
tables = ds_bag.list_tables()
# tables

In [44]:
clinical_records = ds_bag.get_table_as_dataframe('Clinical_Records')
icd_mapping = ds_bag.get_table_as_dataframe('Clinical_Records_ICD10_Eye')
icd_codes = ds_bag.get_table_as_dataframe('ICD10_Eye')[['Name']]

## Attatch label to the icd discription

In [None]:
# GS, Mild Glaucoma, Moderate Glaucoma, Severe Glaucoma, Unspecified/Indeterminate Glaucoma
glaucoma_codes = icd_codes[icd_codes['Name'].str.startswith('H40')]
glaucoma_codes.loc[glaucoma_codes['Name'].str.startswith('H40.0'),'Label'] = 'GS'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '0', 'Label'] = 'Unspecified/Indeterminate'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '1', 'Label'] = 'Mild'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '2', 'Label'] = 'Moderate'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '3', 'Label'] = 'Severe'
glaucoma_codes.loc[glaucoma_codes['Name'].str[7] == '4', 'Label'] = 'Unspecified/Indeterminate'
glaucoma_codes.dropna(subset=['Label'], inplace=True)

## Attatch label to clinical records

In [None]:
mapping = icd_mapping[['Clinical_Records', 
                       'ICD10_Eye']].merge(glaucoma_codes, 
                                           left_on='ICD10_Eye', 
                                           right_on='Name', 
                                           how='left')
# Define severity order
severity_order = {
    'Severe': 4,
    'Moderate': 3,
    'Mild': 2,
    'Unspecified/Indeterminate': 1,
    'GS': 0
}

# Create a function to get the most severe label
def get_most_severe(labels):
    if labels.isna().all():
        return pd.NA
    valid_labels = labels.dropna()
    if len(valid_labels) == 0:
        return pd.NA
    return max(valid_labels, key=lambda x: severity_order.get(x, -1))

# Group by clinical_records and apply the function
result = mapping.groupby('Clinical_Records')['Label'].agg(get_most_severe).reset_index()
result.dropna(subset=['Label'], inplace=True)
result

# Feature ingestion

In [None]:
EA.find_features('Clinical_Records')

In [50]:
feature_name = 'Glaucoma_Severity'
Feature = EA.feature_record_class('Clinical_Records', feature_name)
from IPython.display import Markdown, display
display(
    Markdown('### Feature Name'),
    [ f'Name: {c.name}, Required: {not c.nullok}' for c in Feature.feature.feature_columns]
)

In [76]:
EA.add_term("Severity_Label", term_name="Moderate", description="Moderate stage")
EA.add_term("Severity_Label", term_name="Severe", description="Severe stage")
EA.add_term("Severity_Label", term_name="Mild", description="Mild stage")
EA.add_term("Severity_Label", term_name="Unspecified/Indeterminate", 
            description="Indeterminate stage or stage unspecified")
EA.add_term("Severity_Label", term_name="GS", description="Glaucoma Suspect")

severe_feature_list = [Feature(
    Execution=execution.execution_rid,
    Clinical_Records=row['Clinical_Records'],
    Severity_Label=row['Label'],
    Feature_Name=feature_name,
    ) for index, row in result.iterrows()]

In [89]:
execution.add_features(severe_feature_list)

In [None]:
execution.upload_execution_outputs()