In [1]:
from pathlib import Path
import sys

# Define the base repository directory
repo_dir = Path.home() / "Desktop" / "eye_ai" / "Github"  # Update to your GitHub repo location

# Update the load path so Python can find modules for the model
sys.path.insert(0, str(repo_dir / "deriva-ml"))
sys.path.insert(0, str(repo_dir / "eye-ai-ml"))

# Reload extensions if needed
%load_ext autoreload
%autoreload 2


In [2]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI
from deriva_ml.dataset_bag import DatasetBag
import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml.deriva_ml_base import MLVocab as vc
from deriva_ml.execution_configuration import ExecutionConfiguration, Workflow, Execution

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [3]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# host = 'dev.eye-ai.org'
# catalog_id = "428"

host = 'www.eye-ai.org'
catalog_id = "21"
# catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

2024-11-20 17:44:00,063 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-20 17:44:00,065 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>


You are already logged in.


In [4]:
# Variables to configure the rest of the notebook.
cache_dir = Path.home() / 'Desktop/test_cache'
working_dir = Path.home() / 'Desktop/test_cache'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

2024-11-20 17:44:00,100 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-20 17:44:00,101 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2024-11-20 17:44:00,619 - INFO - Loading dirty model.  Consider commiting and tagging: 1.1.0.post75+git.b118dc1b.dirty


# 1. Configuration

In [5]:
# Add Workflow Vocab terms
EA.add_term(vc.workflow_type, "Test Workflow", description="A test Workflow for new DM")
# Workflow instance
test_workflow = Workflow(
    name="Test New Workflow",
    url="https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/schema_changes/test_eye-ai_changes.ipynb",
    workflow_type="Test Workflow"
)
# Configuration instance
config = ExecutionConfiguration(
    bdbags=['2-7K8W'], #minid:1JiKxaIcjAIaO
    models = ['2-4JR6'],
    execution=Execution(description="Sample Test Execution"),
    workflow=test_workflow,
    description="Our Test Workflow instance")
# Initialize execution
configuration_record = EA.initialize_execution(config)
execution_rid = configuration_record.execution_rid


2024-11-20 17:44:01,137 - INFO - Configuration validation successful!
2024-11-20 17:44:03,158 - INFO - Initializing downloader: GenericDownloader v1.7.4 [Python 3.10.15, macOS-10.16-x86_64-i386-64bit]
2024-11-20 17:44:03,161 - INFO - Creating client of type <class 'globus_sdk.services.auth.client.native_client.NativeAppAuthClient'> for service "auth"
2024-11-20 17:44:03,162 - INFO - Finished initializing AuthLoginClient. client_id='8ef15ba9-2b4a-469c-a163-7fd910c9d111', type(authorizer)=<class 'globus_sdk.authorizers.base.NullAuthorizer'>
2024-11-20 17:44:03,165 - INFO - Validating credentials for host: www.eye-ai.org
2024-11-20 17:44:03,359 - INFO - Creating bag directory: /var/folders/f8/85_209cx72l8mnt3km_8dx540000gn/T/tmpfsaa52p4/Dataset_2-7K8W
2024-11-20 17:44:03,362 - INFO - Creating bag for directory /var/folders/f8/85_209cx72l8mnt3km_8dx540000gn/T/tmpfsaa52p4/Dataset_2-7K8W
2024-11-20 17:44:03,363 - INFO - Creating data directory
2024-11-20 17:44:03,364 - INFO - Moving /private

In [7]:
print(configuration_record)
configuration_record.bag_paths[0]

caching_dir: /Users/zhiweili/Desktop/test_cache
working_dir: /Users/zhiweili/Desktop/test_cache/zhiweili/EyeAI_working
execution_rid: 4-3QTR
workflow_rid: 4-3QNE
bag_paths: [PosixPath('/Users/zhiweili/Desktop/test_cache/2-7K8W_5a248389652eb78e4a3ee6d8e67a90d86c38416ae234005006d1290ba61e0e1e/Dataset_2-7K8W')]
asset_paths: [PosixPath('/Users/zhiweili/Desktop/test_cache/zhiweili/EyeAI_working/4-3QTR/models/optic_disk_crop_model.hdf5')]
configuration: bdbags=['2-7K8W'] models=['2-4JR6'] workflow=Workflow(name='Test New Workflow', url='https://github.com/informatics-isi-edu/eye-ai-exec/blob/main/notebooks/schema_changes/test_eye-ai_changes.ipynb', workflow_type='Test Workflow', version=None, description=None) execution=Execution(description='Sample Test Execution') description='Our Test Workflow instance'


PosixPath('/Users/zhiweili/Desktop/test_cache/2-7K8W_5a248389652eb78e4a3ee6d8e67a90d86c38416ae234005006d1290ba61e0e1e/Dataset_2-7K8W')

In [8]:
ds_bag = DatasetBag(configuration_record.bag_paths[0])

In [10]:
diag = ds_bag.get_table_as_dataframe('Diagnosis')
diag['Diagnosis_Tag'].unique() # missing records with tag = 'AI_glaucomasuspect_test'

array(['CNN_Prediction', 'Initial Diagnosis'], dtype=object)

# Test extract image and reshape method

In [11]:
init_diag = EA.image_tall(ds_bag, 'Initial Diagnosis')
grader_diag = EA.image_tall(ds_bag, 'AI_glaucomasuspect_test')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(index, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(index, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(index, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(index, inplace=True)
A value is trying to be set on a copy of a slice fro

In [12]:
grader_diag['Diagnosis_Tag'].unique()

array(['CNN_Prediction', 'Initial Diagnosis'], dtype=object)

In [ ]:
with ml_instance.execution(configuration=configuration_record) as exec:
    output_dir = ml_instance.execution_assets_path / "Feature1"
    output_dir.mkdir(parents=True, exist_ok=True)
    with open(output_dir / "test.txt", "w+") as f:
        f.write("Hello there\n")    

In [None]:
uploaded_assets = EA.execution_upload(configuration_records.execution_rid, True)

In [None]:
configuration_records.execution_rid