In [None]:
# Prerequisites
import json
import os
from eye_ai.eye_ai import EyeAI
from deriva_ml.dataset_bag import DatasetBag

import pandas as pd
from pathlib import Path, PurePath
import logging

from deriva_ml import DatasetBag, Workflow, ExecutionConfiguration
from deriva_ml import MLVocab as vc
from deriva_ml.deriva_definitions import ColumnDefinition, BuiltinTypes
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
# Login
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# host = 'www.eye-ai.org'
host = 'dev.eye-ai.org'
catalog_id = "eye-ai"

gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'
working_dir = '/data'
EA = EyeAI(hostname = host, catalog_id = catalog_id, cache_dir= cache_dir, working_dir=working_dir)

# Configuration

In [None]:
EA.add_term(vc.workflow_type, "Test Workflow", description="A test Workflow for new DM")

workflow_instance = EA.create_workflow(
    name="Feature creation template",
    workflow_type="Test Workflow",
)

config = ExecutionConfiguration(
    # Comment out the following line if you don't need the assets.
    # datasets=[DatasetSpec(rid='2-277M', version=EA.dataset_version('2-277M'), materialize=False)],
    # assets=[],
    workflow= workflow_instance, 
    description="Template instance of a feature creation workflow.")

execution = EA.create_execution(config)

In [None]:
print(execution)

# Create Feature

In [None]:
# template
new_feature = EA.create_feature(target_table="The table name that feature attached to",
                                feature_name = "The Name of the feature", 
                                terms = ["Table name of the cv columns"], 
                                metadata = ["List of other value types that are associated with the feature"],
                                comment="Any comments")

# E.g. Feature table with predicted probabilities of each category
column_angle1 = ColumnDefinition(name='1', type=BuiltinTypes.float4)
column_angle2 = ColumnDefinition(name='2', type=BuiltinTypes.float4)
column_angle3 = ColumnDefinition(name='3', type=BuiltinTypes.float4)
feature_image_angle = EA.create_feature(target_table="Image",
                                        feature_name = "Fundus_Angle", 
                                        metadata = [column_angle1, column_angle2, column_angle3],
                                        comment="Angle of fundus images")                       
# E.g. Feature table with predicted cv terms
feature_image_laterality = EA.create_feature(target_table="Image",
                                             feature_name = "Fundus_Laterality",
                                             terms = ["Image_Side_Vocab"], 
                                             comment="Lateraliity of fundus images")

# Retrive a feature

In [None]:
# Find all the features that attached to a table
table_name = "Image"
EA.find_features(table_name)

In [None]:
feature_name = 'Fundus_Angle'
Feature = EA.feature_record_class(table_name, feature_name)

In [None]:
from IPython.display import Markdown, display

In [None]:
# display all the columns in a feature table
display(
    Markdown('### Feature Name'),
    [ f'Name: {c.name}, Required: {not c.nullok}' for c in Feature.feature.feature_columns]
)

In [None]:
cl = [c for c in Feature.feature.feature_columns]
help(cl[0].type)


# Ingest new values to a feature table

## Feature with only values

In [None]:
# Format ingestion values
diag_feature_list = [Feature(
    Execution=configuration_record.execution_rid,
    Image= image_rid,
    col1 = ,
    col2 = ,
    Feature_Name = feature_name,
    ) for image_rid in image_rid_list]

# E.g. Diagnosis 
diag_feature_list = [DiagnosisFeature(
    Execution=configuration_record.execution_rid,
    Image= image_rid,
    Diagnosis_Image= ["No Glaucoma", "Suspected Glaucoma", "Unknown"][random.randint(0,2)],
    Image_Quality= ["Good", "Bad", "Unknown"][random.randint(0,2)],
    Feature_Name = "Diagnosis",
    Cup_Disk_Ratio = 0.9000,
    Diagnosis_Tag = "test_feature") for image_rid in ['2-BDAM', '2-BDAP', '2-BDAR']]

# Ingest new values to the feature table
EA.add_features(diag_feature_list)

## Feature with assets

In [None]:
ImageBoundingboxFeature = EA.feature_record_class("Image", feature_name)
# get the dir for saving feature assets and feature file.
bb_csv_path, bb_asset_paths = fs_execution.feature_paths('Image', 'BoundingBox')

# save the feature asset to the feature dir
bounding_box_files = [bb_asset_paths['BoundingBox'] / f"box{i}.txt" for i in range(10)]

image_bounding_box_feature_list = [ImageBoundingboxFeature(Image=image_rid,
                                                           Execution=fs_execution.execution_rid,
                                                           BoundingBox=asset_dir_name,
                                                           Annotation_Function = 'Optic Nerve',
                                                          # ....
                                                          # Find all the columns for a feature from Feature.feature.feature_columns
                                                          # see section Retrieve a feature)
                                       for image_rid, asset_dir_name in zip(image_rids, itertools.cycle(bounding_box_files))]

execution.write_feature_file(image_bounding_box_feature_list)
execution.upload_execution_outputs()