Skip to content

Commit

Permalink
add documentation for jupyter magic and integration api
Browse files Browse the repository at this point in the history
  • Loading branch information
jfischer committed Nov 29, 2019
1 parent 0bc8d67 commit 500ecf3
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 11 deletions.
22 changes: 17 additions & 5 deletions dataworkspaces/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright 2018,2019 by MPI-SWS and Data-ken Research. Licensed under Apache 2.0. See LICENSE.txt.
"""
API for selected Data Workspaces management functions.
This is an API for selected Data Workspaces management functions.
"""
from typing import Optional, NamedTuple, List, Iterable, cast, Tuple
from os.path import join
Expand All @@ -20,14 +20,21 @@


def get_version():
"""Get the version string for the installed version of Data Workspaces"""
return __version__

def get_api_version():
"""The API version is maintained independently of the overall DWS version.
It should be more stable.
"""
return __api_version__



class ResourceInfo(NamedTuple):
"""Named tuple representing the
results from a call to :func:`~get_resource_info`.
"""
name : str
role : str
resource_type : str
Expand All @@ -49,6 +56,9 @@ def get_resource_info(workspace_uri_or_path:Optional[str]=None, verbose:bool=Fal


class SnapshotInfo(NamedTuple):
"""Named tuple represneting the results from a call
to :func:`~get_snapshot_history`
"""
snapshot_number: int
hashval : int
tags : List[str]
Expand Down Expand Up @@ -97,10 +107,12 @@ def restore(tag_or_hash:str, workspace_uri_or_path:Optional[str]=None,
only:Optional[List[str]]=None, leave:Optional[List[str]]=None,
verbose:bool=False) -> int:
"""Restore to a previous snapshot, identified by either its hash
or its tag (if one was specified). :param only: is an optional list of
resources to store. If specified, all other resources will be left as-is.
:param leave: is an optional list of resource to leave as-is. Both
:param only: and :param leave: should not be specified together.
or its tag (if one was specified). Parameters:
* ``only`` - an optional list of resources to store. If specified
all other resources will be left as-is.
* ``leave`` - an optional list of resource to leave as-is. Both
``only`` and ``leave`` should not be specified together.
Returns the number of resources changed.
"""
Expand Down
24 changes: 19 additions & 5 deletions dataworkspaces/kits/jupyter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""
Integration with Jupyter notebooks. This module provides a
:class:`~LineageBuilder` subclass to simplify Lineage for Notebooks.
It also provides a collection of IPython *magics* (macros) for working
in Jupyter notebooks.
"""
import os
import sys
Expand Down Expand Up @@ -309,6 +312,12 @@ async def _call_snapshot(self):

@line_magic
def dws_info(self, line):
parser = DwsMagicParseArgs("dws_info",
description="Print some information about this workspace")
try:
args = parser.parse_magic_line(line)
except DwsMagicArgParseExit:
return # user asked for help
print("Notebook name: %s" % self.dws_jupyter_info.notebook_name)
print("Notebook path: %s" % self.dws_jupyter_info.notebook_path)
print("Workspace directory: %s" % self.dws_jupyter_info.workspace_dir)
Expand All @@ -318,7 +327,8 @@ def dws_info(self, line):

@line_magic
def dws_snapshot(self, line):
parser = DwsMagicParseArgs("dws_snapshot")
parser = DwsMagicParseArgs("dws_snapshot",
description="Save the notebook and create a new snapshot")
parser.add_argument('-m', '--message', type=str, default=None,
help="Message describing the snapshot")
parser.add_argument('-t', '--tag', type=str, default=None,
Expand All @@ -341,7 +351,8 @@ def dws_snapshot(self, line):
@line_magic
def dws_history(self, line):
import pandas as pd # TODO: support case where pandas wasn't installed
parser = DwsMagicParseArgs("dws_history")
parser = DwsMagicParseArgs("dws_history",
description="Print a history of snapshots in this workspace")
parser.add_argument('--max-count', type=int, default=None,
help="Maximum number of snapshots to show")
parser.add_argument('--tail', default=False, action='store_true',
Expand Down Expand Up @@ -377,7 +388,8 @@ def dws_history(self, line):
@line_magic
def dws_lineage_table(self, line):
import pandas as pd # TODO: support case where pandas wasn't installed
parser = DwsMagicParseArgs("dws_lineage_table")
parser = DwsMagicParseArgs("dws_lineage_table",
description="Show a table of lineage for the workspace's resources")
parser.add_argument('--snapshot', default=None, type=str,
help="If specified, print lineage as of the specified snapshot hash or tag")
try:
Expand All @@ -389,7 +401,8 @@ def dws_lineage_table(self, line):

@line_magic
def dws_lineage_graph(self, line):
parser = DwsMagicParseArgs("dws_lineage_table")
parser = DwsMagicParseArgs("dws_lineage_table",
description="Show a graph of lineage for a resource")
parser.add_argument('--resource', default=None, type=str,
help="Graph lineage from this resource. Defaults to the results resource. Error if not specified and there is more than one.")
parser.add_argument('--snapshot', default=None, type=str,
Expand All @@ -407,7 +420,8 @@ def dws_lineage_graph(self, line):

@line_magic
def dws_results(self, line):
parser = DwsMagicParseArgs("dws_results")
parser = DwsMagicParseArgs("dws_results",
description="Show results from a run (results.json file)")
parser.add_argument('--resource', default=None, type=str,
help="Look for the results.json file in this resource. Otherwise, will look in all results resources and return the first match.")
parser.add_argument('--snapshot', default=None, type=str,
Expand Down
14 changes: 13 additions & 1 deletion docs/internals.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ Source Data Sets
We want the ability to name source data sets and swap them in and out without
changing other parts of the workspace. This still needs to be implemented.

Intrermediate Data
Intermediate Data
~~~~~~~~~~~~~~~~~~
For intermediate data, we may want to delete it from the current state of
the workspace if it becomes out of date (e.g. a data source version is changed
Expand Down Expand Up @@ -238,6 +238,18 @@ resources into the leave set, as if specified in the ``--leave`` option.
If the user puts a results resource in the ``--only`` set, we will error
out for now.

.. _integration_api:

Integration API
---------------
The module ``dataworkspaces.api`` provides a simplified, high level programmatic
inferface to Data Workspaces. It is for integration with third-party tooling.

.. automodule:: dataworkspaces.api
:no-undoc-members:
:members:


.. _workspace_api:

Core Workspace API
Expand Down
111 changes: 111 additions & 0 deletions docs/kits.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,117 @@ Jupyter
:no-undoc-members:
:members: NotebookLineageBuilder, is_notebook, get_step_name_for_notebook

Magics
~~~~~~
This module also provides a collection of IPython `magics <https://ipython.readthedocs.io/en/stable/interactive/magics.html>`_
(macros) to simplify interactions with your data workspace when develping in a Jupyter Notebook.

Limitations
...........
Currently these magics are only supported in interactive Jupyter Notebooks. They do not run properly
within JupyterLab (we are currently working on an extension specific to JupyterLab),
the `nbconvert` command, or if you run the entire notebook with "Run All Cells".

Loading the magics
..................
To load the magics, run the following in an interactive cell of your Jupyter Notebook::

import dataworkspaces.kits.jupyter
%load_ext dtaworkspaces.kits.jupyter

If the load runs correctly, you should see output like this in your cell:

Ran DWS initialization. The following magic commands have been added to your notebook:

* ``%dws_info`` - print information about your dws environment
* ``%dws_history`` - print a history of snapshots in this workspace
* ``%dws_snapshot`` - save and create a new snapshot
* ``%dws_lineage_table`` - show a table of lineage for the workspace resources
* ``%dws_lineage_graph`` - show a graph of lineage for a resource
* ``%dws_results`` - show results from a run (results.json file)

Run any command with the ``--help`` option to see a list of options for that command.
The variable ``DWS_JUPYTER_NOTEBOOK`` has been added to your variables, for use in future DWS calls.

Magic Command reference
.......................
We now describe the command options for the individual magics.

**%dws_info**

usage: dws_info [-h]

Print some information about this workspace

optional arguments:
-h, --help show this help message and exit

**%dws_history**

usage: dws_history [-h] [--max-count MAX_COUNT] [--tail]

Print a history of snapshots in this workspace

optional arguments:
-h, --help show this help message and exit
--max-count MAX_COUNT
Maximum number of snapshots to show
--tail Just show the last 10 entries in reverse order

**%dws_snapshot**

usage: dws_snapshot [-h] [-m MESSAGE] [-t TAG]

Save the notebook and create a new snapshot

optional arguments:
-h, --help show this help message and exit
-m MESSAGE, --message MESSAGE
Message describing the snapshot
-t TAG, --tag TAG Tag for the snapshot. Note that a given tag can only
be used once (without deleting the old one).

**%dws_lineage_table**

usage: dws_lineage_table [-h] [--snapshot SNAPSHOT]

Show a table of lineage for the workspace's resources

optional arguments:
-h, --help show this help message and exit
--snapshot SNAPSHOT If specified, print lineage as of the specified
snapshot hash or tag

**%dws_lineage_graph**

usage: dws_lineage_table [-h] [--resource RESOURCE] [--snapshot SNAPSHOT]

Show a graph of lineage for a resource

optional arguments:
-h, --help show this help message and exit
--resource RESOURCE Graph lineage from this resource. Defaults to the
results resource. Error if not specified and there is
more than one.
--snapshot SNAPSHOT If specified, graph lineage as of the specified
snapshot hash or tag

**%dws_results**

usage: dws_results [-h] [--resource RESOURCE] [--snapshot SNAPSHOT]

Show results from a run (results.json file)

optional arguments:
-h, --help show this help message and exit
--resource RESOURCE Look for the results.json file in this resource.
Otherwise, will look in all results resources and
return the first match.
--snapshot SNAPSHOT If specified, get results as of the specified snapshot
or tag. Otherwise, looks at current workspace and then
most recent snapshot.


Scikit-learn
------------

Expand Down

0 comments on commit 500ecf3

Please sign in to comment.