# Tomo funcX Flow

This example creates a flow to use tomopy on Theta.

In [None]:
import os
import sys
import time
import shortuuid

from funcx.sdk.client import FuncXClient
from fair_research_login import NativeClient
from globus_automate_client import create_flows_client
from globus_automate_client.token_management import CLIENT_ID

# Authenticate

Auth with the funcX and Automate clients.

Note: You will still need to grant access to the flow later on in this notebook.

In [None]:
fxc = FuncXClient()

In [None]:
flows_client = create_flows_client(CLIENT_ID)

In [None]:
client = NativeClient(client_id='7414f0b4-7d05-4bb6-bb00-076fa3f17cf5') ##inverted
tokens = client.login(requested_scopes=['https://auth.globus.org/scopes/56ceac29-e98a-440a-a594-b41e7a084b62/all'])
auth_token = tokens["petrel_https_server"]['access_token']
headers = {'Authorization': f'Bearer {auth_token}'}

# Test Setup
## Edit things here only!!

In [None]:
# container name
### should be the same within the container repo and resource
container_name = "tomo_example.simg"

In [None]:
#funcx endpoint configuration for theta
theta_conf = {'endpoint': '8f2f2eab-90d2-45ba-a771-b96e6d530cad',
              'local_endpoint': '8f2f2eab-90d2-45ba-a771-b96e6d530cad',
              'data_dir': '/projects/APSDataAnalysis/Braid/data/Tomo/',
              'proc_dir': '/projects/APSDataAnalysis/Braid/process/',
              'cont_dir': '/home/rvescovi/.funcx/containers/'}

In [None]:
#funcx endpoint configuration for midway 
midway_conf = {'endpoint': '159eeda6-d2c3-4e87-b9b6-98711a938b48',
              'local_endpoint': 'dede52af-cee9-4e44-b017-ceb0c8f584cb',
              'data_dir': '/home/ravescovi/workspace/Braid/data/Tomo/',
              'proc_dir': '/home/ravescovi/workspace/Braid/process/',
              'cont_dir': '/home/ravescovi/.funcx/containers/'}

In [None]:
# Choose which resource to run.
conf = theta_conf

In [None]:
# Register your container
### Even though we download the container during the flow.
#### We need to create a funcx id for it.
tomo_cont_id = fxc.register_container(location=os.path.join(conf['cont_dir'],container_name), container_type='singularity') 

# funcX setup
Register the functions for the flow.

In [None]:
def download_cont(data):
    """Download the container and dataset"""
    import os
    import requests


    server_url = data.get('container_server_url', "")
    container_name = data.get('container_name', "")
    headers = data['headers']

    if server_url==None:
        raise(NameError('No container `server URL` specified'))
    if container_name==None:
        raise(NameError('No container `name` specified'))

    container_url = os.path.join(server_url,container_name)
    
    ##deal with container path in the system (need to be consistent with container_uid)
    container_path = data.get('container_path', '')
    if not container_path:
        os.path.join(os.path.expanduser("~"),'.funcx/containers')

    if not os.path.exists(data['container_path']):
        os.mkdir(data['container_path'])

    #deal with overwrite?
    if not os.path.isfile(data['container_path']):
        container_full_name = os.path.join(data['container_path'],data['container_name'])
        
        r = requests.get(container_url, headers=headers)
        if not r.status_code==200:  
            raise r.raise_for_status()
        open(container_full_name , 'wb').write(r.content)

    return container_full_name

download_cont_fxid = fxc.register_function(download_cont)

In [None]:
def download_data(data):
    import os
    import requests

    data_dir = data['data_dir']
    headers = data['headers']
    dataset_server = data['dataset_server_url']
    dataset_name = data['dataset_name']

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    
    dataset_path = os.path.join(data_dir, dataset_name)
    dataset_url = os.path.join(dataset_server,dataset_name)

    if not os.path.isfile(dataset_path):
        r = requests.get(dataset_url, headers=headers)
        if not r.status_code==200:  
            raise r.raise_for_status()
        open(dataset_path , 'wb').write(r.content)
    
    return dataset_path

download_data_fxid = fxc.register_function(download_data)

In [None]:
def tomo_recon(data):
    import os
    import subprocess


    data_dir = data['data_dir']
    dataset_name = data['dataset_name']
    

    dataset_path = os.path.join(data_dir, dataset_name)

    proc_dir = data['proc_dir']
    if not os.path.exists(proc_dir):
        os.mkdir(proc_dir)

    recon_type = data.get("recon_type", "full")

    cmd = f"tomopy recon --file-name {dataset_path} --output-folder {proc_dir} --reconstruction-type {recon_type}"
    result = subprocess.run(cmd.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return result.stdout

recon_fxid = fxc.register_function(tomo_recon, container_uuid=tomo_cont_id)

# Flow Check Functions

In [None]:
from IPython.display import clear_output
import json
from pprint import pprint
from datetime import datetime, timezone

def flow_check(flow_action, refresh=10):
    
    start = datetime.fromisoformat(flow_action['start_time'])

    while True:
        flow_action_id = flow_action['action_id']
        flow_action = flows_client.flow_action_status(flow_id, flow_scope, flow_action_id)
        flow_status = flow_action['status']

        print(f'Flow status: {flow_status}')
        
        if flow_status == 'ACTIVE': 
            now = datetime.now(timezone.utc)
            print(f'Time elapsed: {now - start}')
            pprint(json.dumps(flow_action.data, indent = 2, sort_keys=True))

        elif flow_status == 'FAILED':
            complete = datetime.fromisoformat(flow_action['completion_time'])
            print(f'Time elapsed: {complete - start}')
            break
        elif flow_status == 'SUCCEEDED':
            complete = datetime.fromisoformat(flow_action['completion_time'])
            print(f'Time elapsed: {complete - start}')
            break

        clear_output(wait=True)
        time.sleep(refresh)



# Create the flow

In [None]:
flow_definition = {
  "Comment": "Tomo Reconstruction",
  "StartAt": "Download Container",
  "States": {
    "Download Container": {
      "Comment": "Download the container",
      "Type": "Action",
      "ActionUrl": "https://api.funcx.org/automate",
      "ActionScope": "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all",
      "Parameters": {
          "tasks": [{
            "endpoint.$": "$.input.funcx_local_ep",
            "func.$": "$.input.download_cont_fxid",
            "payload.$": "$.input"
        }]
      },
      "ResultPath": "$.Exec1Result",
      "WaitTime": 600,
      "Next": "Download Data"
    },
    "Download Data": {
      "Comment": "Download the data",
      "Type": "Action",
      "ActionUrl": "https://api.funcx.org/automate",
      "ActionScope": "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all",
      "Parameters": {
          "tasks": [{
            "endpoint.$": "$.input.funcx_local_ep",
            "func.$": "$.input.download_data_fxid",
            "payload.$": "$.input"
        }]
      },
      "ResultPath": "$.Exec2Result",
      "WaitTime": 600,
      "Next": "Tomopy Recon"
    },
    "Tomopy Recon": {
      "Comment": "Reconstruct full tomogram",
      "Type": "Action",
      "ActionUrl": "https://api.funcx.org/automate",
      "ActionScope": "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all",
      "Parameters": {
          "tasks": [{
            "endpoint.$": "$.input.funcx_ep",
            "func.$": "$.input.recon_fxid",
            "payload.$": "$.input"
        }]
      },
      "ResultPath": "$.Exec3Result",
      "WaitTime": 3600,
      "End": True
    }
  }
}

In [None]:
flow = flows_client.deploy_flow(flow_definition, title="Tomo Example flow")
flow_id = flow['id']
flow_scope = flow['globus_auth_scope']
print(f'Newly created flow with id:\n{flow_id}')

# Define input for the flow

The input to the flow needs to specify what data to process, where it is located, and where to put it for analysis. The flow also requires the funcX function endpoint ids to use.

In [None]:
#Set the name for the processing folder intermediate results
experiment_name = 'braid_tomo'
run_name = experiment_name + '_' + shortuuid.uuid()

print(run_name)

In [None]:
flow_input = {
    "input": {
        #HTTPS-Download Container variables
        "container_server_url":"https://45a53408-c797-11e6-9c33-22000a1e3b52.e.globus.org/Braid/containers",
        "container_name": container_name,
        "container_path": conf['cont_dir'], ##defined in the funcx container configuration
        "headers": headers,

        #SingleFile-Download variables
        "dataset_server_url": "https://45a53408-c797-11e6-9c33-22000a1e3b52.e.globus.org/Braid/data/tomo_example",
        "dataset_name": "tooth.h5",
        "data_dir": conf['data_dir'],

        #Processing variables
        "proc_dir": os.path.join(conf['proc_dir'],run_name),

        #Tomopy specific variables
        "recon_type": "full",

        #Tomopy funcX functions
        "recon_fxid": recon_fxid,
        
        #Utility funcX functions
        "download_cont_fxid": download_cont_fxid,
        "download_data_fxid": download_data_fxid,

        # funcX endpoints
        "funcx_ep": conf['endpoint'],
        "funcx_local_ep": conf['local_endpoint'],
    }
}

# Run the flow

This will require you to authenticate and grant access to the flow to use Transfer and funcX on your behalf.

The flow should take a couple of minutes.

In [None]:

flow_action = flows_client.run_flow(flow_id, flow_scope, flow_input)


In [None]:
flow_check(flow_action, refresh=1)