In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [None]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
catalog_id = "eye-ai" #@param
host = 'www.eye-ai.org'
# host = 'dev.eye-ai.org'

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
# Variables to configure the rest of the notebook.

cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.

configuration_rid="2-C62W" # rid dev:2-BHAA prod:2-C62W


In [None]:
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

In [None]:
# @title Initiate an Execution
configuration_records = EA.execution_init(configuration_rid=configuration_rid)
configuration_records.model_dump()

In [None]:
# Extract four modalities from the multimodal dataset
modality_df = EA.extract_modality(configuration_records.bag_paths[0])
modality_df # returns the paths to four csv file for each modality (Clinical data, HVF reports, RNFL reports, fundus image)

In [None]:
# Extract and join four modalities from the multimodal dataset
wide =  EA.multimodal_wide(configuration_records.bag_paths[0])

In [None]:
wide

In [None]:
def compare_sides_severity(group, value_col, new_col, smaller=True):
    if len(group) == 2:  # Ensure there are both left and right sides
        left = group[group['Side'] == 'Left']
        right = group[group['Side'] == 'Right']
        if not left.empty and not right.empty:
            left_value = left[value_col].values[0]
            right_value = right[value_col].values[0]
            if smaller:
                if left_value < right_value:
                    group.loc[group['Side'] == 'Left', new_col] = 'Left'
                    group.loc[group['Side'] == 'Right', new_col] = 'Left'
                elif left_value == right_value:
                    group.loc[group['Side'] == 'Left', new_col] = 'Left/Right'
                    group.loc[group['Side'] == 'Right', new_col] = 'Left/Right'
                else:
                    group.loc[group['Side'] == 'Left', new_col] = 'Right'
                    group.loc[group['Side'] == 'Right', new_col] = 'Right'
            else:
                # Larger value means more severe
                if left_value > right_value:
                    group.loc[group['Side'] == 'Left', new_col] = 'Left'
                    group.loc[group['Side'] == 'Right', new_col] = 'Left'
                elif left_value == right_value:
                    group.loc[group['Side'] == 'Left', new_col] = 'Left/Right'
                    group.loc[group['Side'] == 'Right', new_col] = 'Left/Right'
                else:
                    group.loc[group['Side'] == 'Left', new_col] = 'Right'
                    group.loc[group['Side'] == 'Right', new_col] = 'Right'
    return group

In [None]:
import numpy as np

wide['RNFL_severe'] = np.nan
wide = wide.groupby('RID_Subject').apply(compare_sides_severity, value_col='Average_RNFL_Thickness(μm)', new_col='RNFL_severe', smaller=True).reset_index(drop=True)

wide['HVF_severe'] = np.nan
wide = wide.groupby('RID_Subject').apply(compare_sides_severity, value_col='MD', new_col='HVF_severe, smaller=True).reset_index(drop=True)

wide['CDR_severe'] = np.nan
wide = wide.groupby('RID_Subject').apply(compare_sides_severity, value_col='CDR', new_col='CDR_severe', smaller=True).reset_index(drop=True)

In [None]:
col = 'CDR_severe'
wide[col] = np.nan
wide = wide.groupby('RID_Subject').apply(compare_sides_severity, value_col='CDR', new_col=col, smaller=True).reset_index(drop=True)

In [None]:
def check_severity(row):
    return row['RNFL_severe'] != row['HVF_severe'] or row['RNFL_severe'] != row['CDR_severe'] or row['HVF_severe'] != row['CDR_severe']

wide['Severity_Mismatch'] = wide.apply(check_severity, axis=1)


In [None]:
severity_compare = wide[['RID_Subject', 'Subject_ID', 'Gender', 'Ethnicity', 'Side', 'Date_of_Encounter_Fundus', 'CDR', 'Date_of_Encounter_HVF', 'MD', 
                         'Date_of_Encounter_RNFL', 'Average_RNFL_Thickness(μm)', 'RNFL_severe', 'HVF_severe', 'CDR_severe', 'Severity_Mismatch']]

In [None]:
# Save results
severity_compare = wide[['RID_Subject', 'Subject_ID', 'Gender', 'Ethnicity', 'Side', 'Date_of_Encounter_Fundus', 'CDR', 'Date_of_Encounter_HVF', 'MD', 
                         'Date_of_Encounter_RNFL', 'Average_RNFL_Thickness(μm)', 'RNFL_severe', 'HVF_severe', 'CDR_severe', 'Severity_Mismatch']

os.mkdir(configuration_records.working_dir/'Execution_Assets/Multimodal_Analysis/')
wide_path = configuration_records.working_dir/'Execution_Assets/Multimodal_Analysis/wide_multimodal_full.csv'
severe_path = configuration_records.working_dir/'Execution_Assets/Multimodal_Analysis/severity_compare.csv'
wide.to_csv(wide_path, index=False)
severity_compare.to_csv(severe_path, index=False)

In [None]:
uploaded_assets = EA.execution_upload(configuration_records.execution_rid, False)
