In [None]:
repo_dir = "Repos"   # Set this to be where your github repos are located.
%load_ext autoreload
%autoreload 2

# Update the load path so python can find modules for the model
import sys
from pathlib import Path
sys.path.insert(0, str(Path.home() / repo_dir / "deriva-ml"))
sys.path.insert(0, str(Path.home() / repo_dir / "eye-ai-ml"))

In [None]:
# Prerequisites

import json
import os
from eye_ai.eye_ai import EyeAI
import pandas as pd
from pathlib import Path, PurePath
import logging
from deriva.chisel import Model, Schema, Table, Column, Key, ForeignKey, builtin_types
from deriva_ml.deriva_ml_base import DerivaML, DerivaMLException, FileUploadState, UploadState, ColumnDefinition, BuiltinTypes

# import torch

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
# host = 'dev.eye-ai.org'
# catalog_id = "428"

host = 'www.eye-ai.org'
catalog_id = "21"


gnl = GlobusNativeLogin(host=host)
if gnl.is_logged_in([host]):
    print("You are already logged in.")
else:
    gnl.login([host], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

In [None]:
cache_dir = '/data'        # Directory in which to cache materialized BDBags for datasets
working_dir = '/data'    # Directory in which to place output files for later upload.
EA = EyeAI(hostname = host, catalog_id = catalog_id, 
           cache_dir= cache_dir, working_dir=working_dir)

In [None]:
def insert(table, mapping_list, num_up):
    n = len(mapping_list)
    for i in range(n // num_up):
        table.insert(mapping_list[i * num_up: (i + 1) * num_up])
        logging.info(f"Updated indices: {i * num_up} to {(i + 1) * num_up}")
    remaining_start = (i + 1) * num_up if n >= num_up else 0
    if remaining_start < n:
        table.insert(mapping_list[remaining_start: n])
        logging.info(f"Updated indices: {remaining_start} to {n}")

In [None]:
feature_image_annot = EA.create_feature(feature_name = "Annotation", 
                                        table = "Image", 
                                        terms = ["Annotation_Function", "Annotation_Type"], 
                                        assets = ["Fundus_Bounding_Box"],
                                        comment="Bounding box SVG on fundus images")    

# table = EA.model.schemas['eye-ai'].tables['Execution_Image_Annotation']
# execution_column = table.column_definitions['Execution']
# execution_column.alter(nullok=True)

In [None]:
exec_asset = EA.ml_schema_instance.tables['Execution_Assets']
bounding_box = pd.DataFrame(exec_asset.path.
                            filter(exec_asset.Execution_Asset_Type == 'Image_Annotation').
                            entities()).drop(['RCT', 'RMT', 'RCB', 'RMB'], 
                                             axis=1)	

exec_asset_exec = EA.ml_schema_instance.tables['Execution_Assets_Execution']
exec_asset_exec_data = pd.DataFrame(exec_asset_exec.path.entities())[['Execution_Assets', 'Execution']]

image_annot = EA.domain_schema_instance.tables['Image_Annotation']
image_annot_data = pd.DataFrame(image_annot.path.entities()).drop(['RID', 'RCT', 'RMT', 'RCB', 'RMB'], axis=1)

feature_complete = pd.merge(bounding_box, exec_asset_exec_data, how='left', left_on='RID', right_on='Execution_Assets')
feature_complete = pd.merge(feature_complete, image_annot_data, how='left', on='Execution_Assets')
feature_complete.drop(['RID', 'Execution_Assets'], axis=1, inplace=True)
feature_complete


In [None]:
bounding_box = feature_complete[['URL', 'Filename', 'Description', 'Length', 'MD5']]
bounding_box

ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in bounding_box.to_dict(orient='records')
]

bb_table = EA.domain_schema_instance.tables['Fundus_Bounding_Box']
# insert(bb_table, ingest_list, 5000)

In [None]:
bbx = EA.domain_schema_instance.tables['Fundus_Bounding_Box']
bbx = pd.DataFrame(bbx.path.entities())[['RID', 'Filename']]
bbx
annot_feature = feature_complete[['Execution', 'Image', 'Annotation_Function', 'Annotation_Type', 'Filename']]

annot_feature = pd.merge(annot_feature, bbx, how='left', on='Filename')
annot_feature.drop(['Filename'], axis=1, inplace=True)
annot_feature['Feature_Name'] = 'Annotation'
annot_feature.rename(columns={'RID':'Fundus_Bounding_Box'}, inplace=True)


In [None]:
ingest_list = [
    {k: v for k, v in record.items() if v is not None and not pd.isna(v)}
    for record in annot_feature.to_dict(orient='records')
]

exec_image_annnot = EA.domain_schema_instance.tables['Execution_Image_Annotation']
insert(exec_image_annnot, ingest_list, 5000)


In [None]:
EA.model.schemas['eye-ai'].tables['Image_Annotation'].drop()
EA.model.schemas['eye-ai'].tables['Execution_Image_Annotation'].alter(table_name='Image_Annotation')

In [None]:
EA.model.schemas['deriva-ml'].tables['Dataset'].foreign_keys[0].drop()

In [None]:

EA.model.schemas['deriva-ml'].tables['Dataset_Dataset_Type'].foreign_keys


In [None]:
EA.model.schemas['deriva-ml'].tables['Dataset_Type'].foreign_keys