In [None]:
# repo_dir = "Repos"   # Set this to be where your github repos are located.
# %load_ext autoreload
# %autoreload 2

# # Update the load path so python can find modules for the model
# import sys
# from pathlib import Path
# sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))
# sys.path.insert(0, str(Path.home() / repo_dir / "deriva-ml"))

In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetSpec, DatasetBag, Workflow, ExecutionConfiguration, VersionPart
from deriva_ml import MLVocab as vc
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# host = 'www.eye-ai.org'
host = 'dev.eye-ai.org'
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

# 1. Configuration

In [None]:
# RID of source dataset, if any.
source_dataset = '5-XW4J' #'4-SB3E' #'4-SS8W'#'5-STDM'#'2-7KA2'

EA.add_term(vc.workflow_type, "Test Workflow", description="A test Workflow for new DM")
# Workflow instance
workflow_instance = EA.create_workflow(
    name="LAC data template",
    workflow_type="Test Workflow",
)

# Configuration instance.
config = ExecutionConfiguration(
    datasets=[DatasetSpec(rid=source_dataset, version=EA.dataset_version(source_dataset), materialize=True)],
    # Materialize set to False if you only need the metadata from the bag, and not the assets.
    assets=['2-4JR6'],
    workflow=workflow_instance,
    description="Template instance of a feature creation workflow")

# Initialize execution
execution = EA.create_execution(config)


In [None]:
print(execution)

# 2. Get access to the data

In [None]:
# Find all the dataset bags get downloaded. (Here in this example, we only have one dataset)
print(execution.datasets)

# retrieve each dataset bag
ds_bag = execution.datasets[0]
ds_bag

## 2.1 Check all the tables in the dataset bags

In [None]:
ds_bag.list_tables()

## 2.2 Load data in a table to a dataframe 
Use method `ds_bag.get_table_as_dataframe(<table_name>)`\
Examples:

In [None]:
subject_table = ds_bag.get_table_as_dataframe('Subject')
observation_table = ds_bag.get_table_as_dataframe('Observation')
image_table = ds_bag.get_table_as_dataframe('Image')

In [None]:
subject_table

## 2.3 Handling nested dataset
We have already downloaded the dataset **2-7KA2**, which contains **15** subjects. We call it a full set.\
The dataset **2-7KA2** has a **subset 2-7K8W** which contains **10** subjects from the full set. \
The following scripts show how to get access to the data in a nested dataset.


In [None]:
subset_2_7K8W_members = EA.list_dataset_members('2-7K8W') # Only the 10 subjects from the subset will be listed here 
subset_2_7K8W_members

In [None]:
# Create a dataframe of all the subjects in the subset.
subject_2_7K8W = pd.DataFrame(subset_2_7K8W_members['Subject'])
subject_2_7K8W

# Use join to get the Observation/Image/Diagnosis that only belong to the subset
observation_2_7K8W = pd.merge(subject_2_7K8W, observation_table, left_on='RID', right_on='Subject')
image_2_7K8W = pd.merge(observation_2_7K8W, image_table, left_on='RID_y', right_on='Observation')
image_2_7K8W

# 3. Get access to the downloaded assets

In [None]:
print(execution.asset_paths)

# 4. Methods for LAC data

### Filter on angle 2 image

In [None]:
angle2_image = EA.filter_angle_2(ds_bag)

### Diagnosis manipulation

In [None]:
init_diag = EA.image_tall(ds_bag, 'Initial Diagnosis')
grader_diag = EA.image_tall(ds_bag, 'AI_glaucomasuspect_test')
long, wide = EA.reshape_table([init_diag, grader_diag], 'Diagnosis_Image')

In [None]:
init_diag

### Crop Image by bounding box


In [None]:
execution.dataset_paths[0]

In [None]:
output_dir = execution.working_dir
image_path, cropped_csv = EA.create_cropped_images(execution.dataset_paths[0], ds_bag, output_dir, crop_to_eye=True)

# Execution of ML

In [None]:
with execution.execute() as exec:
    print("An ML Execution.") 

# Upload results

In [None]:
# crete asset path
asset_type_name = ""
asset_path = execution.execution_asset_path("Training_Log")
# save assets to asset_path
df.to_csv(asset_path+'filename.csv', index=False)

# upload assets to catalog
execution.upload_execution_outputs(clean_folder=True)

In [None]:
execution.upload_execution_outputs(clean_folder=True)