In [7]:
import sys
import os
import time
import json
sys.path.append(".")
os.environ['JUPYTER_PATH'] = '.'
CLIENT_ID = "e6c75d97-532a-4c88-b031-8584a319fa3e"

from globus.automate.client import (get_access_token_for_scope, create_action_client, 
                                    create_flows_client)

## Important: A Note on Authentication and Authorization

All interactions between users and services with the Globus Automate Platform are governed by use of Authentication and Authorization using the Globus Auth system. 
* In particular, this means that consent must be given by the user for each interaction taking place on their part, including in this Notebook.
* The first time you interact with each service such as the Flow service, and Action, or even a Flow instance, you will be provided a link to perform the consent flow.
* You must click the link to complete the consent flow which will launch in a new tab.
* When complete, copy the code string, return to the Notebook tab, and  paste the  code into the inputbox that is presented below the link to begin the flow.

# Globus Automate: Flows and Actions

## Flow Definition

* Flows are composed of *Action* invocations
* Each Action invocation reads from and contributes back to the *Flow State* which is referenced in Flow steps using the `InputPath` and `ResultPath` properties of an Action.

In [6]:
flow_definition = {
  "Comment": "Two step transfer",
  "StartAt": "Transfer1",
  "States": {
    "Transfer1": {
      "Comment": "Initial Transfer from Campus to DTN in DMZ",
      "Type": "Action",
      "ActionUrl": "https://actions.automate.globus.org/Transfer",
      "ActionScope": "https://auth.globus.org/scopes/helloworld.actions.automate.globus.org/globus_transfer_action_all",
      "InputPath": "$.Transfer1Input",
      "ResultPath": "$.Transfer1Result",
      "Next": "Transfer2"
    },
    "Transfer2": {
      "Comment": "Transfer from DMZ to dataset repository",
      "Type": "Action",
      "ActionUrl": "https://actions.automate.globus.org/Transfer",
      "ActionScope": "https://auth.globus.org/scopes/helloworld.actions.automate.globus.org/globus_transfer_action_all",
      "InputPath": "$.Transfer2Input",
      "ResultPath": "$.Transfer2Result",
      "End": True
    }
  }
}


* This flow composes two transfers into a single logical operation
  * Suitable, for example, for doing a two stage transfer between a local campus endpoint, a DMZ data transfer endpoint, and a dataset repository.
  * Each step in the Flow uses the same Action: Transfer which is referenced by URL
  * Globus Auth Scope information is required to authenticate operations to the Action.
  * Source and destination information for the Transfer state are given in `InputPath` and `ResultPath`
    * Format of the input is Action dependent (see below)


In [7]:
flows_client = create_flows_client(CLIENT_ID)
flow = flows_client.deploy_flow(flow_definition)
flow_id = flow['id']
flow_scope = flow['scope_string']
print(f'Newly created flow with id:\n{flow_id}\nand scope:\n{flow_scope}')

Newly created flow with id:
c70382b0-eede-417f-ac16-ae31f7c0f65d
and scope:
https://auth.globus.org/scopes/eec9b274-0c81-4334-bdc2-54e90e689b9a/flow_6ead1c8c_a6d9_4013_a713_ca0bff695f0e


* The newly created flow has an id, and a scope which will be used when running the flow.

* The first step in running the flow is to prepare an input document which provides the necessary data for running the flow.
  * In the flow definition, we see that the action at `ActionUrl` `https://actions.automate.globus.org/Transfer` will be invoked twice to perform the two transfer operations. They expect input from `InputPath` value `Transfer1` and `Transfer2` respectively. So, the input will require values for both `Transfer1` and `Transfer2`. But, what is the format for that input?
  
## Action Introspection

We can get the input format for any action via introspection which will return, among other properties, the schema for input to the Action in JSON Schema format.

In [8]:
TRANSFER_SCOPE = 'https://auth.globus.org/scopes/helloworld.actions.automate.globus.org/globus_transfer_action_all'
transfer_token = get_access_token_for_scope(TRANSFER_SCOPE, CLIENT_ID)
transfer_client = create_action_client(
    "http://actions.automate.globus.org/Transfer", transfer_token)
transfer_action_info = transfer_client.introspect()
transfer_action_schmea = transfer_action_info.data['input_schema']
print(f'Schema for Transfer Action: {json.dumps(transfer_action_schmea, indent=2)}')

Schema for Transfer Action: {
  "additionalProperties": false,
  "properties": {
    "destination_endpoint_id": {
      "type": "string"
    },
    "label": {
      "type": "string"
    },
    "source_endpoint_id": {
      "type": "string"
    },
    "sync_level": {
      "type": "string"
    },
    "transfer_items": {
      "items": {
        "additionalProperties": false,
        "properties": {
          "destination_path": {
            "type": "string"
          },
          "recursive": {
            "type": "boolean"
          },
          "source_path": {
            "type": "string"
          }
        },
        "required": [
          "source_path",
          "destination_path"
        ],
        "type": "object"
      },
      "type": "array"
    }
  },
  "required": [
    "source_endpoint_id",
    "destination_endpoint_id",
    "transfer_items"
  ],
  "type": "object"
}


* We see that the `required` properties of the input are `source_endpoint_id`, `destination_endpoint_id` and `transfer_items`.
  * `transfer_items` in turn, is a list of objects containing a `source_path` and `destination_path`.
* With this information, we can create the necessary input document for running our flow specifying the required parameters in both the `Transfer1Input` and `Transfer2Input` properties of our input document.

### Note: In the below example, we assume that endpoints and folders have been already setup as follows:
1. On `go#ep1` in the home directory `/~/` there should be a folder called `campus_source` and another called `dataset_repository` which we use to represent the initial source of the data and the repository destination for the data.
2. Within `campus_source` on `go#ep1`, there should be a folder called `dataset1` which may contain any files which you may treat as the content of the dataset.
3. On `go#ep2` there should be a folder in the home directory `/~/` called `dmz_temp` which represents the intermediate location in the DMZ where the data will be staged.

If any of these requirements are not met, the flow will fail to execute. If you prefer to use alternative endpoints or folder names, the input document assigned to `flow_input` below can be updated as desired. 

In [8]:
flow_input = {
  "Transfer1Input": {
    "source_endpoint_id": "go#ep1",
    "destination_endpoint_id": "go#ep2",
    "transfer_items": [
      {
        "source_path": "/~/campus_source/dataset1/",
        "destination_path": "/~/dmz_temp/dataset1",
        "recursive": True
      }
    ]
  },
  "Transfer2Input": {
    "source_endpoint_id": "go#ep2",
    "destination_endpoint_id": "go#ep1",
    "transfer_items": [
      {
        "source_path": "/~/dmz_temp/dataset1/",
        "destination_path": "/~/dataset_repository/dataset1",
        "recursive": True
      }
    ]
  }
}

flow_action = flows_client.run_flow(flow_id, flow_scope, flow_input)
flow_action_id = flow_action['id']
flow_status = flow_action['status']
print(f'Flow action started with id: {flow_action_id}')
while flow_status == 'RUNNING':
    time.sleep(2)
    flow_action = flows_client.flow_action_status(flow_id, flow_scope, flow_action_id)
    flow_status = flow_action['status']
    print(f'Flow status: {flow_status}')

Login Here:

https://auth.globus.org/v2/oauth2/authorize?client_id=e6c75d97-532a-4c88-b031-8584a319fa3e&redirect_uri=https%3A%2F%2Fauth.globus.org%2Fv2%2Fweb%2Fauth-code&scope=https%3A%2F%2Fauth.globus.org%2Fscopes%2Feec9b274-0c81-4334-bdc2-54e90e689b9a%2Fflow_6ead1c8c_a6d9_4013_a713_ca0bff695f0e&state=_default&response_type=code&code_challenge=0vNixDFsjkSTpfiibjv_AHr3Ite46VydJuwTtcoPlLY&code_challenge_method=S256&access_type=offline&prefill_named_grant=Globus+Automate+Client


Note that this link can only be used once! If login or a later step in the flow fails, you must restart it.
Enter resulting code:1kubnGKuI0BzSAhOX3Vfe76C8uI55y
Flow action started with id: 2ca0da46-ba51-4d4a-8583-36e25ee26d2e
Flow status: RUNNING
Flow status: RUNNING
Flow status: RUNNING
Flow status: RUNNING
Flow status: RUNNING
Flow status: RUNNING
Flow status: RUNNING
Flow status: SUCCEEDED


* In addition to receiving Notifications from Globus that the transfers have occurred, inspecting the various endpoints and folders setup above should verify that the data which originally resided in the `dataset1` folder has been replicated to both the intermediate/DMZ endpoint and the final, data repository endpoint.
  * A more complete, and complex example may remove the dataset from the intermediate/DMZ endpoint when the second transfer is completed.


## All Actions are invoked with the same pattern via REST

This pattern is key to use of any Actions in Flows, but we may also execute Actions directly using the operations:

1. run - Starts an Action running
  * Input includes a request identifier to allow idempotent retry of run operations
  * Response provides an action identifier used for all subsequent operations
  * May be synchronous, returning their final result immediately, or asynchronos requiring further polls to determine completion status
2. status - Poll for updates to Action's progress
  * Indicates whether Action is completed successfully, with failure, or is still running
  * Output includes Action-specific result of the Action
  * May be called repeatedly, even after the Action is complete
3. release - Indicate that Action status may be removed
  * Only permitted when the Action is in a final state either success or failure
  * May only be called once -- subsequent calls will error as if the Action (and action identifier) never existed
  * Will be performed implicitly after Action completion following an extended holding duration
4. cancel - Attempt to stop an Action during execution
  * Never guaranteed to be successful or allowed by the Action
  * Only valid while the Action is running; returns error if the Action is completed


* We can also run Actions directly to take advantage of their standard pattern

In [10]:
IDENTIFIERS_SCOPE = "https://auth.globus.org/scopes/5fac2e64-c734-4e6b-90ea-ff12ddbf9653/identifiers_run_status_release"
identifiers_request = {
  "namespace_id": "A63CZStxzei6",
  "location": ["https://app.globus.org/file-manager?origin_id=ddb59aef-6d04-11e5-ba46-22000b92c6ec"],
    "visible_to": ["public"],
    "metadata": {
      "creator": "Steve User",
      "title": "An Important Dataset"
    }
}
identifiers_token = get_access_token_for_scope(IDENTIFIERS_SCOPE, CLIENT_ID)
identifiers_client = create_action_client(
    "http://actions.automate.globus.org/Identifiers", identifiers_token)

identifiers_action_status = identifiers_client.run(identifiers_request)
identifiers_action_id = identifiers_action_status["action_id"]
print(f'ActionId: {identifiers_action_id} Status: {identifiers_action_status["status"]}')
while identifiers_action_status["status"] not in ("SUCCEEDED", "FAILED"):
    time.sleep(2)
    identifiers_action_status = identifiers_client.status(identifiers_action_id)
    print(f'ActionId: {identifiers_action_id} Status: {identifiers_action_status["status"]}')

print(f"Link to new Identifier: {identifiers_action_status.data['details']['landing_page']}")


ActionId: A4jg2iBH9qEB Status: SUCCEEDED
ActionID: A4jg2iBH9qEB


GlobusAPIError: (404, 'Error', '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n<title>404 Not Found</title>\n<h1>Not Found</h1>\n<p>The requested URL was not found on the server.  If you entered the URL manually please check your spelling and try again.</p>\n')

## Action Implementation Helper in Automate SDK

* Creation of new Actions allows the Automate platform to expand to new use cases.
* The required Action API methods have been wrapped in some helper classes for registering API routes with a Flask container and managing Action state in a Postgres database
  * Yields a relatively easy to implement set of methods as shown below in a complete Action implementation, supporting asynchrony, for a simple Hello World-style Action.

In [None]:
from globus.automate.common import AbstractActionProvider, ActionInstance, AuthState


class HelloWorldActionProvider(AbstractActionProvider):
    # Declare the format for inputs to this Action for use in Action Introspection
    # and for validation of input by the service
    request_body_schema = {
        "type": "object",
        "properties": {
            "echo_string": {"type": "string"},
            "sleep_time": {"type": "integer"},
        },
        "additionalProperties": False,
    }

    def __init__(self, *args, **kwargs):
        # Set the Actions REST API URL location on the server
        self.url_prefix = kwargs.get("url_prefix", "HelloWorld")
        
        # Define properties needed to do Authentication of requests via Globus Auth
        self.globus_auth_client_id = kwargs.get(
            "globus_auth_client_id", "5fac2e64-c734-4e6b-90ea-ff12ddbf9653"
        )
        self.globus_auth_client_name = kwargs.get(
            "globus_auth_client_name", "hello_world_action_provider"
        )
        self.globus_auth_client_secret = kwargs.get("globus_auth_client_secret")
        self.globus_auth_scope = kwargs.get(
            "globus_auth_scope",
            (
                "https://auth.globus.org/scopes/helloworld.actions.automate.globus.org/all"
            ),
        )
        
        # Set properties for Action Introspection
        self.title = "Hello World"
        self.subtitle = "An Action responding Hello to an input value"
        self.visible_to = kwargs.get("visible_to", ["public"])
        self.administered_by = kwargs.get("administered_by", ["foo@bar.com"])
        self.admin_contact = kwargs.get("admin_contact", "support@globus.org")
        self.synchronous = False
        self.log_supported = False
        self.runnable_by = kwargs.get("runnable_by", ["public"])
        self.input_schema = HelloWorldActionProvider.request_body_schema
        super(HelloWorldActionProvider, self).__init__(*args, **kwargs)

    def _action_done(self, action: ActionInstance) -> bool:
        """
        Helper for determining when a request with sleep_time in the request is
        completed.
        """
        if "sleep_time" not in action.request_body:
            return True
        else:
            now = datetime.datetime.now()
            run_length = (now - action.start_time).total_seconds()
            return run_length > int(action.request_body["sleep_time"])

    def run_action(self, action: ActionInstance, auth_state: AuthStatae) -> ActionInstance:
        # Callback for starting a new Action
        action.action_id = self.generate_actionid()
        action.details = {"Hello": "World"}
        if "echo_string" in action.request_body:
            action.details["hello"] = action.request_body["echo_string"]
        if self._action_done(action):
            action.status = "SUCCEEDED"
            action.completion_time = datetime.datetime.now()
        else:
            action.status = "ACTIVE"
        return action

    def check_status(self, action: ActionInstance, auth_state: AuthState) -> ActionInstance:
        # Callback for a user status check
        if self._action_done(action):
            action.status = "SUCCEEDED"
            action.completion_time = datetime.datetime.now()
        else:
            action.status = "ACTIVE"
        return action

    def cancel_action(self, action: ActionInstance, auth_state: AuthState) -> ActionInstance:
        # Callback for cancel
        action.status = 'FAILED'
        action.completion_time = datetime.datetime.now()
        return action

# Create the flask app, sqlalchemy engine and configure the action on the flask app
flask = Flask(__name__)
db_engine = sqlalchemy.create_engine(db_uri)
hello_world_provider = HelloWorldActionProvider()
hello_world_provider.set_flask_routes(flask, db_engine)