In [1]:
# This notebook gives an example of how to access the model elements (e.g.,
# schemas, tables, columns) that are used when building data paths.

In [2]:
# Import deriva modules
from deriva_common import ErmrestCatalog, get_credential

In [3]:
# Connect with the deriva catalog
protocol = 'https'
hostname = 'www.facebase.org'
catalog_number = 1
credential = None
# If you need to authenticate, use Deriva Auth agent and get the credential:
# credential = get_credential(hostname)
catalog = ErmrestCatalog(protocol, hostname, catalog_number, credential)

In [4]:
# Get the path builder interface for this catalog
pb = catalog.getPathBuilder()

In [5]:
# The path builder gives you access to a representation of the catalog's
# data model beginning with the catalog's schemas. The `.schemas` property
# acts like a python dictionary or Map object. Use its `keys()` method to
# get a listing of the schema names.
pb.schemas.keys()

['load_temp', 'viz', 'legacy', 'vocabulary', 'metadata_raw', 'isa']

In [6]:
# Here we will get a handle to the `isa` schmea
isa = pb.schemas['isa']

# PROTIP: Jupyter Notebook supports <tab> completion. Press the <tab> key
# after typing the brackets of a dictionary to see the keys. Typing
# `pb.schemas[<tab>]` will give you a dropdown of schema names.

In [7]:
# An alternative way to get a handle to the same schema object is 
# directly as a property of the path builder object itself. However,
# this only works for schema names that are _valid python identifiers_.
# A valid python identifier may start with `_` or a letter as its first
# character and have `_`, letters, or numbers for the rest of its
# characdters.
#  * Valid: `dataset` `assay` `Molecule_Type` etc.
#  * Not Valid: `Sample 1 Type` `Control?` `# of reads` etc.

# **IMPORTANT**
# Similar access methods will be demonstrated for tables and columns 
# below. Since not all catalog model names are valid python identifiers
# when you use this method, you may not see your catalog's complete
# data model. However, the notation is more compact and ideal for
# cases where your model uses (all or mostly) valid python identifiers
# in its model element names.

isa = pb.isa

In [8]:
# Similarly, a schema object has a `tables` property that gives you access
# to a representation of the catalog schema's tables. Again, use its `keys()`
# method to list the table names in the schema.
isa.tables.keys()

['dataset_mouse_genetic_background',
 'dataset_human_age',
 'dataset_mouse_theiler_stage',
 'dataset_investigators',
 'dataset_human_anatomic_source',
 'imaging_method',
 'project_publication',
 'icon',
 'dataset_mouse_age_stage',
 'dataset_experiment_type',
 'experiment_type',
 'sample',
 'human_gender',
 'zebrafish_mutation',
 'mouse_enhancer',
 'dataset_status',
 'alignment',
 'specimen',
 'library',
 'file',
 'publication',
 'human_age',
 'instrument',
 'assay',
 'dataset_human_gender',
 'zebrafish_age_stage',
 'dataset_zebrafish_anatomic_source',
 'project',
 'track_file',
 'dataset_mouse_gene',
 'clinical_assay',
 'dataset_organism',
 'external_reference',
 'mouse_gene',
 'dataset_data_type',
 'file_compact',
 'organism',
 'human_enhancer',
 'dataset_zebrafish_genotype',
 'human_age_stage',
 'tracks',
 'dataset_geo',
 'assay_file',
 'mouse_genetic_background',
 'data_type',
 'dataset_zebrafish_mutation',
 'thumbnail',
 'mouse_anatomic_source',
 'mouse_theiler_stage',
 'zebrafish_

In [9]:
# Similarly we can get a table from the schema's `tables` property in
# both of the demonstrated methods.
dataset = isa.tables['dataset']
# or
dataset = isa.dataset

In [10]:
# A table has a `columns` dictionary. We can get the column names as usual.
dataset.columns.keys()

dict_keys(['id', 'accession', 'title', 'project', 'funding', 'summary', 'description', 'view_gene_summary', 'view_related_datasets', 'mouse_genetic', 'human_anatomic', 'study_design', 'release_date', 'status', 'gene_summary', 'thumbnail', 'show_in_jbrowse', '_keywords'])

In [11]:
# Again, we have the following methods to get handles to the table's
# column objects.
accession = dataset.columns['accession']
# or
accession = dataset.accession

In [12]:
# FINAL THOUGHT: The model introspection provided in the datapath
# module (i.e., the PathBuilder) is intended for the narrowly scoped
# usage required for building paths and accessing data from ERMrest
# catalogs. It is _not_ intended for general introspection of catalogs
# and therefore _does not_ include details such as constraints, 
# annotations, ACLs, column data types, etc.