# Solutions to SDK Exercises

## Setup

Setup an instance of `TransferClient` to use in the notebook:

In [None]:
from __future__ import print_function # for python 2
import globus_sdk

tutorial_endpoint_1 = "ddb59aef-6d04-11e5-ba46-22000b92c6ec"      # endpoint "Globus Tutorial Endpoint 1"
tutorial_endpoint_2 = "ddb59af0-6d04-11e5-ba46-22000b92c6ec"      # endpoint "Globus Tutorial Endpoint 2"

transfer_token = None # copy token from SDK notebook, or get one from https://tokens.globus.org

tc = globus_sdk.TransferClient(token=transfer_token)

## Find the endpoint id for XSEDE Comet

In [None]:
endpoint_name = "XSEDE Comet"
eps = tc.endpoint_search(endpoint_name, fields="id,display_name")
endpoint_id = None
for ep in eps:
    if ep["display_name"] == endpoint_name:
        endpoint_id = ep["id"]
        break

if endpoint_id is None:
    print("Error: endpoint with name '{}' not found")
else:
    print("Id of endpoint with name'{}': {}".format(endpoint_name, endpoint_id))

## Set all the metadata fields on your shared endpoint

In [None]:
# copy id from SDK notebook output,
# or find on Web app starting at https://www.globus.org/app/endpoints?scope=shared-by-me
shared_endpoint_id = "d6905986-0701-11e6-a738-22000bf2d559"
ep_update = {
    "DATA_TYPE": "endpoint",
    "description": "Better description for my share created from globus-jupyter-notebook",
    "keywords": "GlobusWorld 2016",
    "organization": "GlobusWorld Example Org",
    "department": "Example Dept",
    "contact_email": "youraddress@example.org",
    "info_link": "https://www.example.org/globusendpoints/"
}
update_result = tc.update_endpoint(shared_endpoint_id, ep_update)
print("{}: {}".format(update_result["code"], update_result["message"]))

## Modify mkdir so that an existing directory does not raise an exception, but all other errors do

In [None]:
endpoint_id = tutorial_endpoint_1
nonexistent_endpoint_id = "28ff8e5b-23f4-4572-b498-9dcdc792999a"
endpoint_path = "/~/tutorial_dir"
try:
    # will result in Exists error if the cell is executed more than once
    mkdir_result = tc.operation_mkdir(endpoint_id, path=endpoint_path)
    
    # Trigger an EndpointNotFound error, which should be raised in the else clause below
    #mkdir_result = tc.operation_mkdir(nonexistent_endpoint_id, path=endpoint_path)
    
    print(mkdir_result["message"])
except globus_sdk.GlobusAPIError as ex:
    # ignore exists error, re-raise anything else
    # Note that in a future API revision, this error will change to be just "Exists", but we will make
    # a namespace change so existing clients will not break.
    if ex.code == "ExternalError.MkdirFailed.Exists":
        print("Directory already exists")
    else:
        raise

# Set access manager role on your shared endpoint, and query both roles and ACLs to see the result.

In [None]:
# Used id from previous solution, or uncomment and paste ID here
# shared_endpoint_id = "ID"

# Identity id of go@globusid.org. In practice, you would use the Globus Auth API to get the id of a
# known identity username or email.
go_identity_id = "c699d42e-d274-11e5-bf75-1fc5bf53bb24"
role = {
    "DATA_TYPE": "role",
    "principal_type": "identity",
    "principal": go_identity_id,
    "role": "access_manager",
}
create_result = tc.add_endpoint_role(shared_endpoint_id, role)
role_id = create_result["id"]

roles = tc.endpoint_role_list(shared_endpoint_id)
print("Roles:")
for role in roles:
    print(role["id"], role["role"], role["principal_type"], role["principal"])
print()
    
acls = tc.endpoint_acl_list(shared_endpoint_id)
print("ACLs:")
for acl in acls:
    print(acl["id"], acl["role_id"], acl["principal_type"], acl["principal"], acl["permissions"], acl["path"])
    
# clean up role, so this cell can be re-run cleanly
r = tc.delete_endpoint_role(shared_endpoint_id, role_id)

## Perform an ls given a bookmark name.

In [None]:
bookmark_name = "My Tutorial Bookmark"
subpath = "" # must not start with slash

# Get all bookmarks and see if one matches. A user can have at most 100 bookmarks, so this
# is reasonable to do client side.
endpoint_id = None
bookmark_path = None
for bmark in tc.bookmark_list():
    if bmark["name"] == bookmark_name:
        endpoint_id = bmark["endpoint_id"]
        bookmark_path = bmark["path"]
        break
        
if endpoint_id is None:
    print("Bookmark with name '{}' not found".format(bookmark_name))
else:
    path = bookmark_path + subpath
    print("path =", path)
    for item in tc.operation_ls(endpoint_id, path=path):
        print("'{}' {} [{}]".format(item["name"], item["type"], item["size"]))

# Perform a transfer akin to ‘rsync –av –delete’.

In [None]:
source_id = tutorial_endpoint_1
dest_id = tutorial_endpoint_2
tc.endpoint_autoactivate(source_id)
tc.endpoint_autoactivate(dest_id)

# This does not exactly match -a, for example it cannot preserve permissions or ownership.
tdata = globus_sdk.TransferData(tc, source_id, dest_id,
                                delete_destination_extra=True,
                                preserve_timestamp=True)
tdata.add_item("/share/godata/", "/~/gw16_notebook_godata/", recursive=True)

submit_result = tc.submit_transfer(tdata)

# wait until transfer is complete, by polling every 15 seconds
import time
# Use the fields query parameter to get only the field we care about. This
# decreases the size of the response, and is supported by most API resources.
status = tc.get_task(submit_result["task_id"], fields="status")["status"]
poll_interval = 15 # in seconds
max_wait = 360
wait_time = 0
while status not in ("SUCCEEDED", "FAILED") and wait_time < max_wait:
    print("Task not yet complete (status {}), sleeping for {} seconds...".format(
            status, poll_interval))
    time.sleep(poll_interval)
    wait_time += poll_interval
    status = tc.get_task(submit_result["task_id"], fields="status")["status"]
print("Task completed with status", status)

# The -v requires using the successful_transfers API, which happens to be missing from the REST documentation
# as of day 1 at GW 16. It will be added shortly, in the Task Management section. It is almost identical to the
# version available to endpoint administrators, described here (although still short on detail):
#  https://docs.globus.org/api/transfer/advanced_endpoint_management/#get_task_successful_transfers_as_admin
# It also doesn't yet have an SDK helper that automatically takes care of paging, so we do it manually here.
if status == "SUCCEEDED":
    next_marker=None
    while True:
        transfers = tc.get("/task/{}/successful_transfers".format(submit_result["task_id"], next_marker=next_marker))
        next_marker = transfers["next_marker"]
        for t in transfers["DATA"]:
            print(t["source_path"], "->", t["destination_path"])
        if next_marker is None:
            break

## Transfer all files in a directory named `*.txt` to another endpoint.

In [None]:
source_id = tutorial_endpoint_1
dest_id = tutorial_endpoint_2
tc.endpoint_autoactivate(source_id)
tc.endpoint_autoactivate(dest_id)

source_path = "/share/godata/"
dest_path = "/~/gw16_notebook_txt_godata/"

tdata = globus_sdk.TransferData(tc, source_id, dest_id)

# Note that the filter happens on the REST server, not on the GridFTP endpoint. This means
# that it still must request the entire directory contents from GridFTP. It reduces network
# traffic between the REST client and server, but still may timeout for very large directories
# because of REST service to GridFTP data stream size.
for item in tc.operation_ls(source_id, path=source_path, filter="name:~*.txt"):
    tdata.add_item(source_path + item["name"], dest_path + item["name"])

submit_result = tc.submit_transfer(tdata)
print("Task ID:", submit_result["task_id"])

## Perform a transfer, monitor for completion, and monitor the event log. If a fault occurs, then cancel the job for some fault types (e.g., file not found), but not others (e.g., permission denied).

In [None]:
source_id = tutorial_endpoint_1
dest_id = tutorial_endpoint_2
tc.endpoint_autoactivate(source_id)
tc.endpoint_autoactivate(dest_id)

tdata = globus_sdk.TransferData(tc, source_id, dest_id)
tdata.add_item("/share/godata/", "/~/gw16_notebook_godata/", recursive=True)
tdata.add_item("/share/godata/dne.txt", "/~/gw16_notebook_godata/dne.txt")

submit_result = tc.submit_transfer(tdata)

cancel_on_errors = set(["FILE_NOT_FOUND"])

# wait until transfer is complete, by polling every 15 seconds
import time
from datetime import datetime
# Use the fields query parameter to get only the field we care about. This
# decreases the size of the response, and is supported by most API resources.
status = tc.get_task(submit_result["task_id"], fields="status")["status"]
poll_interval = 15 # in seconds
max_wait = 360
wait_time = 0
last_error_dt = None
cancel = False
while status not in ("SUCCEEDED", "FAILED") and wait_time < max_wait and not cancel:
    # Search the most recent errors for anything that we want to trigger a cancel,
    # stopping if we get to an error we already saw in a previous iteration of the
    # wait loop (the event list is sorted newest first).
    for error in tc.task_event_list(submit_result["task_id"], filter="is_error:1"):
        if error["code"] in cancel_on_errors:
            cancel = True
            break
        error_dt = datetime.strptime(error["time"], "%Y-%m-%d %H:%M:%S+00:00")
        if last_error_dt is not None and error_dt <= last_error_dt:
            break
        last_error_dt = error_dt
    print("Task not yet complete (status {}), sleeping for {} seconds...".format(
          status, poll_interval))
    time.sleep(poll_interval)
    wait_time += poll_interval
    status = tc.get_task(submit_result["task_id"], fields="status")["status"]

if cancel:
    print("Encountered bad error, canceling task")
    tc.cancel_task(submit_result["task_id"])
elif status not in ("SUCCEEDED", "FAILED"):
    print("Task did not complete before max wait time")
else:
    print("Task completed with status", status)