# Transfer (copy) datasets and create manifest

This notebook shows how to copy a dataset (folder) from one Globus guest collection to another guest collection and create a manifest of the files copied the manifest is then uploaded to the destination collection

In [None]:
import json
import requests
from os.path import relpath
import globus_sdk

In [None]:
# Build link to webapp pages for transfer status and results
# Show how to find things through the UI
# Same information, different interfaces

### Log in to Globus and get access tokens

In [None]:
# Adapted from the Globus Transfer API Exercises example notebook
# https://github.com/globus/globus-jupyter-notebooks/blob/master/Transfer_API_Exercises.ipynb

CLIENT_ID = "3b1925c0-a87b-452b-a492-2c9921d3bd14"
native_auth_client = globus_sdk.NativeAppAuthClient(CLIENT_ID)

cheapandfair_collection = "7352d991-b0a0-49a2-830c-e8fe8c968ca2"  # collection "Cheap and FAIR Tutorial Datasets"
your_srdr_collection = "gggggggg-hhhh-iiii-jjjj-kkkkkkkkkkkk"  # collection "SRDR Tutorial Collection {n}"

# As in the Platform_Introduction_Native_App_Auth notebook, do the Native App Grant Flow
SCOPES = [globus_sdk.scopes.TransferScopes.all,
         f'https://auth.globus.org/scopes/{your_srdr_collection}/https']

# May need to be set to "login" below, if you need to authorize a specific identity for your collection
PROMPT=None

native_auth_client = globus_sdk.NativeAppAuthClient(CLIENT_ID)
native_auth_client.oauth2_start_flow(requested_scopes=SCOPES)
print(f"Login Here:\n\n{native_auth_client.oauth2_get_authorize_url(prompt=PROMPT)}")

In [None]:
auth_code = "your auth code here"
tokens = native_auth_client.oauth2_exchange_code_for_tokens(auth_code).by_resource_server

### Get the Tokens

In [None]:
transfer_access_token = tokens['transfer.api.globus.org']['access_token']
transfer_authorizer = globus_sdk.AccessTokenAuthorizer(transfer_access_token)
tc = globus_sdk.TransferClient(authorizer=transfer_authorizer)
https_token = tokens[your_srdr_collection]['access_token']

### Get the base URL of the SRDR collection

In [None]:
srdr_coll_info = tc.get_endpoint(your_srdr_collection)
srdr_base_url = srdr_coll_info['https_server']

### Transfer (copy) the dataset to the SRDR collection

In [None]:
source_id = cheapandfair_collection
dest_id = your_srdr_collection

source_path = '/public/datasets/cmb/'
dest_path = '/datasets/cmb/'

# This does not exactly match -a, for example it cannot preserve permissions or ownership
tdata = globus_sdk.TransferData(tc, source_id, dest_id,
                                preserve_timestamp=True)

tdata.add_item(source_path, dest_path, recursive=True, checksum_algorithm='sha256')

submit_result = tc.submit_transfer(tdata)

Wait until this transfer completes

### Build the Manifest

In [None]:
manifest = []
next_marker=None
while True:
    transfers = tc.get(f"/task/{submit_result['task_id']}/successful_transfers", query_params=dict(marker=next_marker))
    next_marker = transfers['next_marker']
    for t in transfers['DATA']:
        file_entry = {
            'filename': relpath(t['destination_path'], dest_path),
            'length': t['size'],
            'url': srdr_base_url + t['destination_path'],
            t['checksum_algorithm'].lower(): t['checksum']
        }
        manifest.append(file_entry)
    if next_marker is None:
        break

In [None]:
for f in manifest:
    for k in f.keys():
        print(k + ': ' + str(f[k]))
    print()

## upload the manifest

In [None]:
put_url = f'{srdr_base_url}/{dest_path}manifest.json'

In [None]:
headers = {'Authorization':'Bearer '+ https_token}

In [None]:
resp = requests.put(put_url, headers=headers, json=manifest, allow_redirects=False)

In [None]:
if not resp.text:
    c = str(resp.status_code)
    print(f'PUT to {put_url} status {c}')
    stat = requests.head(put_url,headers=headers, allow_redirects=False)
    print('File info (HEAD)')
    for h in 'Content-Length', 'Content-Type':
        v = stat.headers[h]
        print(f'{h}: {v}')
else:
    print(f'FAILED PUT to {put_url}')
    print(f'Check permissions on collection at https://app.globus.org/file-manager/collections/{your_srdr_collection}/sharing')
