DerivaML is a class library built on the Deriva Scientific Asset management system that is designed to help simplify a number of the basic operations associated with building and testing ML libraries based on common toolkits such as TensorFlow.  This notebook reviews the basic features of the DerivaML library.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
from deriva_ml.schema_setup.test_catalog import create_test_catalog, DemoML

Set the details for the catalog we want and authenticate to the server if needed.

In [None]:
hostname = 'dev.eye-ai.org'
domain_schema = 'demo-schema'

gnl = GlobusNativeLogin(host=hostname)
if gnl.is_logged_in([hostname]):
    print("You are already logged in.")
else:
    gnl.login([hostname], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")


Create a test catalog and get an instance of the DemoML class.

In [None]:
test_catalog = create_test_catalog(hostname, domain_schema)
ml_instance = DemoML(hostname, test_catalog.catalog_id)

In [None]:
ml_instance.chaise_url("Subject")

In [None]:
print([a.name for a in ml_instance.list_dataset_element_types()])
ml_instance.add_dataset_element_type("Subject")
ml_instance.add_dataset_element_type("Image")
print([a.name for a in ml_instance.list_dataset_element_types()])

In [None]:
# Create a new dataset
ml_instance.add_term("Dataset_Type", "TestSet", description="A test dataset")
ml_instance.add_term("Dataset_Type", "Training", description="Training dataset")
ml_instance.add_term("Dataset_Type", "Validation", description="Validation dataset")

subject_dataset = ml_instance.create_dataset(["TestSet"], description="A subject dataset")
image_dataset = ml_instance.create_dataset(["TestSet", "Training"], description="A image training dataset")

subject_rids = [i['RID'] for i in ml_instance.domain_path.tables['Subject'].entities().fetch()]
image_rids = [i['RID'] for i in ml_instance.domain_path.tables['Image'].entities().fetch()]

ml_instance.add_dataset_members(dataset_rid=subject_dataset, members=subject_rids)
ml_instance.add_dataset_members(dataset_rid=image_dataset, members=image_rids[0:2])

In [None]:
print(f"Subject dataset members {ml_instance.list_dataset_members(subject_dataset)}")
print(f"Image dataset members {ml_instance.list_dataset_members(image_dataset)}")

In [None]:
def strip_system(d):
    return {k:v for k,v in d.items() if k not in ['RCT', 'RMT', 'RCB', 'RMB']}

pd.DataFrame([strip_system(m) for m in ml_instance.find_datasets()])

In [None]:
test_catalog.delete_ermrest_catalog(really=True)