DerivaML is a class library built on the Deriva Scientific Asset management system that is designed to help simplify a number of the basic operations associated with building and testing ML libraries based on common toolkits such as TensorFlow.  This notebook reviews the basic features of the DerivaML library.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from deriva.core import DerivaServer, ErmrestCatalog, get_credential
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
from deriva_ml.deriva_ml_base import DerivaML, DerivaMLException, ColumnDefinition, BuiltinTypes
from deriva_ml.schema_setup.create_schema import create_ml_schema
from deriva_ml.schema_setup.test_catalog import create_test_catalog
from deriva_ml.execution_configuration import ExecutionConfiguration

Set the details for the catalog we want and authenticate to the server if needed.

In [None]:
hostname = 'dev.eye-ai.org'
domain_schema = 'demo-schema'

gnl = GlobusNativeLogin(host=hostname)
if gnl.is_logged_in([hostname]):
    print("You are already logged in.")
else:
    gnl.login([hostname], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")


Create a test catalog and get an instance of the DerivaML class.

In [None]:
test_catalog = create_test_catalog(hostname, domain_schema)
ml_instance = DerivaML(hostname, test_catalog.catalog_id, domain_schema, None, None, "1")

In [None]:
ml_instance.chaise_url("Subject")

In [None]:
[t.name for t in ml_instance.find_features()]

In [None]:
ml_instance.add_term("Feature_Name", "Feature1", description="A Feature Name")
ml_instance.create_vocabulary("FeatureValue", "A vocab")
ml_instance.add_term("FeatureValue", "V1", description="A Feature Vale")
a = ml_instance.create_asset("TestAsset", comment="A asset")
ml_instance.create_feature("Feature1", "Image",
                                        terms=["FeatureValue"],
                                        assets=[a],
                                        metadata=[ColumnDefinition(name='TestCol', type=BuiltinTypes.int2)])

[f.name for f in self.ml_instance.find_features("Image")])

In [None]:
TestFeature = ml_instance.feature_record_class("Image", "Feature1")

# Create the name for this feature and then create the feature.
# Get some images to attach the feature value to.
image_rids = [i['RID'] for i in ml_instance.domain_path.tables['Image'].entities().fetch()]
asset_rid = ml_instance.domain_path.tables["TestAsset"].insert([{'Name': "foo", 'URL': "foo/bar", 'Length': 2, 'MD5': 4}])[0]['RID']
# Get an execution RID.
ml_path = ml_instance.catalog.getPathBuilder().schemas['deriva-ml']
ml_instance.add_term("Workflow_Type", "TestWorkflow", description="A workflow")
workflow_rid = ml_path.tables['Workflow'].insert([{'Name': "Test Workflow", 'Workflow_Type': "TestWorkflow"}])[0]['RID']
execution_rid = ml_path.tables['Execution'].insert([{'Description': "Test execution", 'Workflow': workflow_rid}])[0]['RID']
# Now create a list of features using the feature creation class returned by create_feature.
feature_list = [TestFeature(
    Image=i,
    Execution=execution_rid,
    FeatureValue="V1",
    TestAsset=asset_rid,
    TestCol=23) for i in image_rids]
ml_instance.add_features(feature_list)
features = self.ml_instance.list_feature("Image", "Feature1")
print(features)

In [None]:
for i in range(5):
    ml_instance.add_term("My termset", f"Term{i}", description=f"My term {i}", synonyms=[f"t{i}", f"T{i}"])

In [None]:
pd.DataFrame([{'Name': v.name, 'Description': v.description, 'Synonyms': v.synonyms} for v in ml_instance.list_vocabulary_terms("My termset")])

In [None]:
print(ml_instance.lookup_term("My termset", "T1"))
print(ml_instance.lookup_term("My termset", "t1"))

In [None]:
test_catalog.delete_ermrest_catalog(really=True)