# create_cmip6_globus_batch_files.ipynb
Create Globus batch files and scripts for ESGF CMIP6 data of interest.

B. Grandey, 2022.

In [1]:
! date

Mon Apr 25 12:51:09 +08 2022


In [2]:
import datetime
import json
import pandas as pd
import pathlib
import pyesgf
from pyesgf.search import SearchConnection
import re

# Print versions of packages
for module in [json, pd, pyesgf, re]:
    try:
        print('{}.__version__ = {}'.format(module.__name__, module.__version__))
    except AttributeError:
        pass

json.__version__ = 2.0.9
pandas.__version__ = 1.3.5
pyesgf.__version__ = 0.3.0
re.__version__ = 2.2.1


## Base paths

In [3]:
# Base path in which to save batch files and scripts
out_base = pathlib.Path('cmip6_globus_batch_files/').resolve()
# Base path in which to archive previously produced batch files and scripts
archive_base = pathlib.Path('archive/').resolve()
archive_base.mkdir(exist_ok=True)  # create directory if it does not yet exist
# Directory in which to save local cache for search connection
cache_dir = pathlib.Path('cache/').resolve()
cache_dir.mkdir(exist_ok=True)

## Archive previous contents of out_base

In [4]:
# If out_base exists, then archive it
if out_base.exists():
    now_str = datetime.datetime.now().strftime('%Y%m%d-%H%M')
    archive_dir = archive_base.joinpath(f'cmip6_globus_batch_files_{now_str}/')
    out_base.replace(archive_dir)
    cwd = pathlib.Path.cwd()
    print(f'Archived {out_base.relative_to(cwd)}/ to {archive_dir.relative_to(cwd)}/.')

Archived cmip6_globus_batch_files/ to archive/cmip6_globus_batch_files_20220425-1251/.


In [5]:
# Create new out_base
out_base.mkdir(exist_ok=True)

## Establish search connection

In [6]:
# Establish search connection
search_node_url = 'https://esgf-node.llnl.gov/esg-search'
expire_after = datetime.timedelta(days=10)  # cache expiry
conn = SearchConnection(search_node_url,
                        distrib=True,
                        cache='cache/pyesgf_cache',  # enable local cache
                        expire_after=expire_after)
conn

<pyesgf.search.connection.SearchConnection at 0x7fa4b8f54670>

## Identify suitable sources (models) and members (ripf variants)
Do this by finding source-member pairs that fulfil the following requirements:
1. Monthly data are available for at least one of 'zostoga', 'zos', and 'tas' variables.
2. Data are available for 'piControl' experiment.
3. Data are available for at least one other experiment of interest ('abrupt-4xCO2', '1pctCO2', 'historical', 'ssp585', 'ssp370', 'ssp245', 'ssp126').
4. The member is an 'r1i1' variant (e.g. 'r1i1p1f1', 'r1i1p5f2').

In [7]:
%%time
# Create dictionary to hold available experiments (list) for each source-member pair (tuple)
source_member_experiment_dict = dict()
# Perform initial search for datasets matching requirements #1 & #3 above
ctx1 = conn.new_context(project='CMIP6',
                        variable=['zostoga', 'zos', 'tas'],
                        frequency='mon',
                        experiment_id=['1pctCO2', 'abrupt-4xCO2', 'historical',
                                       'ssp585', 'ssp370', 'ssp245', 'ssp126'])
# Loop over available sources
sources = sorted(ctx1.facet_counts['source_id'].keys())
for source_id in sources:
    # Constrain search to source, to identify available members
    ctx2 = ctx1.constrain(source_id=source_id)
    # Find r1i1 members (requirement #4)
    members = sorted(ctx2.facet_counts['member_id'].keys())
    members = [m for m in members if bool(re.match('r1i1', m))]
    # Loop over members
    for member_id in members:
        # Search for available experiments for this source-member pair
        ctx3 = conn.new_context(project='CMIP6',
                                variable=['zostoga', 'zos', 'tas'],
                                frequency='mon',
                                source_id=source_id,
                                member_id=member_id)
        experiments = sorted(ctx3.facet_counts['experiment_id'].keys())
        # Limit to experiments of interest
        experiments = [e for e in experiments if e in ['piControl', '1pctCO2', 'abrupt-4xCO2', 'historical',
                                                       'ssp585', 'ssp370', 'ssp245', 'ssp126']]
        # Are data available for both the historical and piControl experiments?
        if 'piControl' in experiments:
            # Save to dictionary
            source_member_experiment_dict[(source_id, member_id)] = experiments
            # Print
            print(f'{source_id} {member_id}: {experiments}')
# Summarise number of source-member pairs identified
print(f'{len(source_member_experiment_dict)} source-member pairs identified.')

ACCESS-CM2 r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
ACCESS-ESM1-5 r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
AWI-CM-1-1-MR r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
AWI-ESM-1-1-LR r1i1p1f1: ['1pctCO2', 'historical', 'piControl']
BCC-CSM2-MR r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
BCC-ESM1 r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp370']
CAMS-CSM1-0 r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
CAS-ESM2-0 r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
CESM2 r1i1p1f1: ['1pctCO2', 'abrupt-4xCO2', 'historical', 'piControl', 'ssp126', 'ssp245', 'ssp370', 'ssp585']
CESM2-FV2 r1i1p1f1: ['1pctCO2', 'abru

## Function to find Globus URLs and write Globus batch files for a source-member pair

If one wishes to only find only one Globus URL for each unique NetCDF file, then it is faster to search for dataset results then skip datasets with an instance_id that has already been processed.
However, NetCDF files may be missed in practice: some Globus URLs may be inaccessible due to problems with endpoint accessibility, non-existent paths etc.
Therefore, it makes sense to build in some redundancy by including every Globus URL.

In light of these considerations, the function below performs a file search.

In [8]:
def write_globus_batch_files(source_id='ACCESS-CM2',
                             member_id='r1i1p1f1',
                             variables=['zostoga',],
                             frequency='mon',
                             experiments=['piControl', 'historical', 'ssp585', 'ssp370', 'ssp245', 'ssp126'],
                             conn=conn):
    """Find Globus URLs and write Globus batch files for a CMIP6 source-member pair.
    
    Keyword arguments:
      source_id -- string: ESGF source_id / model (default 'ACCESS-CM2')
      member_id -- string; ESGF member_id / ripf variant (default 'r1i1p1f1')
      variables -- list: variables of interest (default ['zostoga',])
      frequency -- string: time frequency of variable (default 'mon') 
      experiments -- list: experiment_id for experiments of interest
          (default ['piControl', 'historical', 'ssp585', 'ssp370', 'ssp245', 'ssp126'])
      conn -- pyesgf SearchConnection (default is a SearchConnection named 'conn')
    
    Returns:
      batch_fn_ep_dict -- dict: names of the batch files written (keys) with corresponding endpoint (values)
    """
    print(f'---- {source_id} {member_id} ----')
    # Create DataFrame to hold Globus info etc for search results
    globus_df = pd.DataFrame(columns=['variable', 'table_id',  # table_id specific to realm (e.g. Omon)
                                      'filename',
                                      'globus_url',  # URL (suitably unique to also use as index)
                                      'globus_ep', 'globus_path',  # Globus source endpoint and path
                                      'dest_path'])  # target path on destination endpoint
    # Create set to hold unique filenames for all variables (used for calculating filename coverage)
    unique_fns_all = set()
    # Loop over variables
    for v in variables:
        # File search context
        ctx1 = conn.new_context(project='CMIP6',
                                source_id=source_id,
                                member_id=member_id,
                                variable=v,
                                frequency=frequency,
                                experiment_id=experiments,
                                latest=True,
                                search_type='File')
        # Create set to hold unique filenames for this variable
        unique_fns_v = set()
        # Perform search and loop over file results
        file_results = ctx1.search()
        print(f'{v}: {len(file_results)} file results to process.')
        for f in file_results:
            # Is result marked as retracted? If so, then skip.
            if f.json['retracted']:
                continue
            # Add filename to unique_fns_v and unique_fns_all sets
            unique_fns_v.add(f.filename)
            unique_fns_all.add(f.filename)
            # Does Globus URL exist?
            globus_url = f.globus_url
            if globus_url:
                # Identify endpoint
                globus_ep = globus_url.split('/')[0]
                globus_ep = globus_ep.replace('globus:', '')
                if len(globus_ep) != 36:
                    print(f'globus_ep = "{globus_ep}" looks suspect. Skipping.')
                else:
                    # Get table_id (e.g. Omon)
                    table_id = f.json['table_id'][0]
                    # Path on endpoint
                    globus_path = globus_url.split(f'{globus_ep}')[1]
                    # Target path on local endpoint (relative to $GCP_EP_CMIP6 environment variable)
                    instance_id = f.json['dataset_id'].split('|')[0]  # dataset's instance_id
                    dest_path = f'{v}/{source_id}_{member_id}/{instance_id}/{f.filename}'
                    # Update DataFrame
                    globus_df.at[globus_url] = {'variable': v, 'table_id': table_id,
                                                'filename': f.filename,
                                                'globus_url': globus_url,
                                                'globus_ep': globus_ep, 'globus_path': globus_path,
                                                'dest_path': dest_path}
        # Print number of URLs found
        try:
            print(f'{v}: {globus_df["variable"].value_counts()[v]} Globus URLs saved.')
        except KeyError:
            print(f'{v}: No Globus URLs saved.')
        # Print number of unique filenames found for this variable
        print(f'{v}: {len(unique_fns_v)} unique filenames found (including non-Globus results).')
    # Dict to hold batch filenames (keys) and source endpoints (values)
    batch_fn_ep_dict = dict()
    # Loop over source endpoints
    for globus_ep in globus_df['globus_ep'].value_counts().index:
        # Select subset of data for this endpoint
        ep_df = globus_df[globus_df['globus_ep']==globus_ep]
        # Calculate filename coverage for this endpoint
        coverage = len(set(ep_df['filename'])) / len(unique_fns_all)
        # Get name of endpoint using Globus CLI
        ep_json = ! globus endpoint show -F json {globus_ep}
        ep_json = json.loads(''.join(ep_json))
        ep_name = ep_json['display_name']
        print(f'{ep_name}: {len(ep_df)} files in batch ({coverage:.0%} coverage).')
        # Label for transfer
        if len(ep_df['table_id'].value_counts()) == 1:  # frequency label
            freq_str = ep_df['table_id'][0]
        else:
            freq_str = frequency
        if len(variables) == 1:  # variable label
            var_str = variables[0]
        elif len(variables) == 2:
            var_str = '-'.join(variables)
        else:
            var_str = f'{len(variables)}vars-inc-{variables[0]}'
        if len(experiments) == 1:  # experiment label
            exp_str = experiments[0]
        else:
            exp_str = f'{len(experiments)}exps'
        batch_label = f'{source_id}_{member_id}_{freq_str}_{var_str}_{exp_str}_{globus_ep}'
        # Filename of batch file to write
        batch_fn = f'{batch_label}.txt'
        # Directory in which to write batch file
        batch_dir = out_base.joinpath(globus_ep)
        batch_dir.mkdir(exist_ok=True)
        # Write batch file
        with open(batch_dir.joinpath(batch_fn), 'w') as writer:
            writer.write(f'# Written by write_globus_batch_files() in create_cmip6_globus_batch_files.ipynb '
                         f'on {datetime.date.today()}.\n')
            writer.write(f'# Globus endpoint is {globus_ep} (Name: {ep_name}).\n')
            writer.write(f'# {len(ep_df)} files in batch ({coverage:.0%} coverage of filenames).\n')
            writer.write(f'# To activate source endpoint use Globus CLI:\n')
            writer.write(f'# globus endpoint activate --web {globus_ep}\n')
            writer.write(f'# To submit transfer use Globus CLI:\n')
            writer.write(f'# globus transfer {globus_ep} $GCP_EP_CMIP6 --batch {batch_fn} '
                         f'--preserve-mtime --fail-on-quota-errors --skip-source-errors --sync-level checksum '
                         f'--label "{batch_label}"\n')
            writer.write(f'# Replace $GCP_EP_CMIP6 with intended destination endpoint, including base path.\n')
            writer.write('\n')
            for i in ep_df.index:  # loop over rows of DataFrame
                globus_path = ep_df.loc[i]['globus_path']
                dest_path = ep_df.loc[i]['dest_path']
                writer.write(f'{globus_path} {dest_path}\n')
            print(f'Written {batch_fn} ({len(ep_df)}, {coverage:.0%})')
            batch_fn_ep_dict[batch_fn] = globus_ep
    return batch_fn_ep_dict

## Write batch files for source-member pairs identified above

In [9]:
# Create dictionary to hold all batch_fn_ep_dict results returned by write_globus_batch_files()
main_batch_fn_ep_dict = dict()

In [10]:
%%time
# Monthy Ocean data variables and experiments of interest
frequency = 'mon'
variables = ['zostoga', 'thetaoga',  # 1D ocean
             'zos', 'hfds', 'hfgeou', 'hfcorr']  # 2D ocean
experiments = ['piControl', '1pctCO2', 'abrupt-4xCO2', 'historical', 'ssp585', 'ssp370', 'ssp245', 'ssp126']
# Loop over source-member pairs
for source_id, member_id in source_member_experiment_dict.keys():
    temp_dict = write_globus_batch_files(source_id=source_id,
                                         member_id=member_id,
                                         variables=variables,
                                         frequency=frequency,
                                         experiments=experiments)
    main_batch_fn_ep_dict.update(temp_dict)  # update dictionary with new filenames and endpoints

---- ACCESS-CM2 r1i1p1f1 ----
zostoga: 36 file results to process.
zostoga: 36 Globus URLs saved.
zostoga: 10 unique filenames found (including non-Globus results).
thetaoga: 30 file results to process.
thetaoga: 30 Globus URLs saved.
thetaoga: 10 unique filenames found (including non-Globus results).
zos: 44 file results to process.
zos: 36 Globus URLs saved.
zos: 10 unique filenames found (including non-Globus results).
hfds: 34 file results to process.
hfds: 34 Globus URLs saved.
hfds: 10 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to process.
hfcorr: No Globus URLs saved.
hfcorr: 0 unique filenames found (including non-Globus results).
LLNL ESGF: 40 files in batch (100% coverage).
Written ACCESS-CM2_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (40, 100%)
NCI ESGF: 40 files in batch (100%

NCI ESGF: 8 files in batch (67% coverage).
Written BCC-ESM1_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (8, 67%)
None: 8 files in batch (67% coverage).
Written BCC-ESM1_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_90282ada-ddac-11e8-8c90-0a1d4c5c824a.txt (8, 67%)
CEDA ESGF DN1: 8 files in batch (67% coverage).
Written BCC-ESM1_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (8, 67%)
DKRZ ESGF CMIP6: 8 files in batch (67% coverage).
Written BCC-ESM1_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (8, 67%)
---- CAMS-CSM1-0 r1i1p1f1 ----
zostoga: 0 file results to process.
zostoga: No Globus URLs saved.
zostoga: 0 unique filenames found (including non-Globus results).
thetaoga: 0 file results to process.
thetaoga: No Globus URLs saved.
thetaoga: 0 unique filenames found (including non-Globus results).
zos: 48 file results to process.
zos: 41 Globus URLs saved.
zos: 9 unique filenames found (including

thetaoga: 67 file results to process.
thetaoga: 67 Globus URLs saved.
thetaoga: 21 unique filenames found (including non-Globus results).
zos: 178 file results to process.
zos: 148 Globus URLs saved.
zos: 42 unique filenames found (including non-Globus results).
hfds: 143 file results to process.
hfds: 143 Globus URLs saved.
hfds: 42 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to process.
hfcorr: No Globus URLs saved.
hfcorr: 0 unique filenames found (including non-Globus results).
NCAR ESGF Data Node: 105 files in batch (100% coverage).
Written CESM2-WACCM-FV2_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_2bfe767c-a073-11e6-b0e1-22000b92c261.txt (105, 100%)
LLNL ESGF: 101 files in batch (96% coverage).
Written CESM2-WACCM-FV2_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (101, 96%)
CEDA ESGF DN1: 59

DKRZ ESGF CMIP6: 6 files in batch (18% coverage).
Written CMCC-ESM2_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (6, 18%)
---- CNRM-CM6-1 r1i1p1f2 ----
zostoga: 36 file results to process.
zostoga: 28 Globus URLs saved.
zostoga: 8 unique filenames found (including non-Globus results).
thetaoga: 35 file results to process.
thetaoga: 27 Globus URLs saved.
thetaoga: 8 unique filenames found (including non-Globus results).
zos: 44 file results to process.
zos: 30 Globus URLs saved.
zos: 8 unique filenames found (including non-Globus results).
hfds: 40 file results to process.
hfds: 32 Globus URLs saved.
hfds: 8 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to process.
hfcorr: No Globus URLs saved.
hfcorr: 0 unique filenames found (including non-Globus results).
LLNL ESGF: 32 files in batch (100% c

DKRZ ESGF CMIP6: 15 files in batch (50% coverage).
Written CanESM5-CanOE_r1i1p2f1_Omon_6vars-inc-zostoga_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (15, 50%)
CEDA ESGF DN1: 9 files in batch (30% coverage).
Written CanESM5-CanOE_r1i1p2f1_Omon_6vars-inc-zostoga_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (9, 30%)
NCI ESGF: 8 files in batch (27% coverage).
Written CanESM5-CanOE_r1i1p2f1_Omon_6vars-inc-zostoga_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (8, 27%)
---- E3SM-1-0 r1i1p1f1 ----
zostoga: 0 file results to process.
zostoga: No Globus URLs saved.
zostoga: 0 unique filenames found (including non-Globus results).
thetaoga: 615 file results to process.
thetaoga: 615 Globus URLs saved.
thetaoga: 193 unique filenames found (including non-Globus results).
zos: 698 file results to process.
zos: 565 Globus URLs saved.
zos: 193 unique filenames found (including non-Globus results).
hfds: 437 file results to process.
hfds: 437 Globus URLs saved.
hfds: 193 unique filenames found (i

hfds: 2760 file results to process.
hfds: 1588 Globus URLs saved.
hfds: 1172 unique filenames found (including non-Globus results).
hfgeou: 344 file results to process.
hfgeou: 172 Globus URLs saved.
hfgeou: 172 unique filenames found (including non-Globus results).
hfcorr: 2344 file results to process.
hfcorr: 1172 Globus URLs saved.
hfcorr: 1172 unique filenames found (including non-Globus results).
LLNL ESGF: 6032 files in batch (100% coverage).
Written EC-Earth3-CC_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (6032, 100%)
CEDA ESGF DN1: 3437 files in batch (57% coverage).
Written EC-Earth3-CC_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (3437, 57%)
DKRZ ESGF CMIP6: 1172 files in batch (19% coverage).
Written EC-Earth3-CC_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (1172, 19%)
---- EC-Earth3-Veg r1i1p1f1 ----
zostoga: 4420 file results to process.
zostoga: 3109 Globus URLs saved.
z

zostoga: 20 file results to process.
zostoga: 20 Globus URLs saved.
zostoga: 20 unique filenames found (including non-Globus results).
thetaoga: 250 file results to process.
thetaoga: 250 Globus URLs saved.
thetaoga: 60 unique filenames found (including non-Globus results).
zos: 287 file results to process.
zos: 224 Globus URLs saved.
zos: 85 unique filenames found (including non-Globus results).
hfds: 484 file results to process.
hfds: 484 Globus URLs saved.
hfds: 120 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to process.
hfcorr: No Globus URLs saved.
hfcorr: 0 unique filenames found (including non-Globus results).
None: 275 files in batch (96% coverage).
Written GFDL-CM4_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_9805b3ba-d9bf-11e5-976c-22000b9da45e.txt (275, 96%)
LLNL ESGF: 233 files in batch (82% coverage).
Written GFDL-CM4_

LLNL ESGF: 43 files in batch (34% coverage).
Written GISS-E2-1-G_r1i1p3f1_Omon_6vars-inc-zostoga_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (43, 34%)
DKRZ ESGF CMIP6: 39 files in batch (31% coverage).
Written GISS-E2-1-G_r1i1p3f1_Omon_6vars-inc-zostoga_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (39, 31%)
NCI ESGF: 12 files in batch (10% coverage).
Written GISS-E2-1-G_r1i1p3f1_Omon_6vars-inc-zostoga_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (12, 10%)
---- GISS-E2-1-G r1i1p5f1 ----
zostoga: 23 file results to process.
zostoga: 13 Globus URLs saved.
zostoga: 10 unique filenames found (including non-Globus results).
thetaoga: 0 file results to process.
thetaoga: No Globus URLs saved.
thetaoga: 0 unique filenames found (including non-Globus results).
zos: 56 file results to process.
zos: 28 Globus URLs saved.
zos: 28 unique filenames found (including non-Globus results).
hfds: 65 file results to process.
hfds: 37 Globus URLs saved.
hfds: 28 unique filenames found (including non

zostoga: 12 file results to process.
zostoga: 8 Globus URLs saved.
zostoga: 4 unique filenames found (including non-Globus results).
thetaoga: 0 file results to process.
thetaoga: No Globus URLs saved.
thetaoga: 0 unique filenames found (including non-Globus results).
zos: 120 file results to process.
zos: 60 Globus URLs saved.
zos: 38 unique filenames found (including non-Globus results).
hfds: 65 file results to process.
hfds: 36 Globus URLs saved.
hfds: 29 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to process.
hfcorr: No Globus URLs saved.
hfcorr: 0 unique filenames found (including non-Globus results).
LLNL ESGF: 71 files in batch (100% coverage).
Written GISS-E2-2-H_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (71, 100%)
CEDA ESGF DN1: 33 files in batch (46% coverage).
Written GISS-E2-

NCI ESGF: 40 files in batch (45% coverage).
Written IPSL-CM6A-LR_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (40, 45%)
---- KACE-1-0-G r1i1p1f1 ----
zostoga: 0 file results to process.
zostoga: No Globus URLs saved.
zostoga: 0 unique filenames found (including non-Globus results).
thetaoga: 0 file results to process.
thetaoga: No Globus URLs saved.
thetaoga: 0 unique filenames found (including non-Globus results).
zos: 0 file results to process.
zos: No Globus URLs saved.
zos: 0 unique filenames found (including non-Globus results).
hfds: 0 file results to process.
hfds: No Globus URLs saved.
hfds: 0 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to process.
hfcorr: No Globus URLs saved.
hfcorr: 0 unique filenames found (including non-Globus results).
---- KIOST-ESM r1i1p1f1 ----
zostoga: 0 fi

thetaoga: 0 file results to process.
thetaoga: No Globus URLs saved.
thetaoga: 0 unique filenames found (including non-Globus results).
zos: 94 file results to process.
zos: 75 Globus URLs saved.
zos: 19 unique filenames found (including non-Globus results).
hfds: 36 file results to process.
hfds: 23 Globus URLs saved.
hfds: 13 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to process.
hfcorr: No Globus URLs saved.
hfcorr: 0 unique filenames found (including non-Globus results).
LLNL ESGF: 40 files in batch (100% coverage).
Written MIROC6_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (40, 100%)
CEDA ESGF DN1: 35 files in batch (88% coverage).
Written MIROC6_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (35, 88%)
DKRZ ESGF CMIP6: 29 files in batch (72% coverage).


CEDA ESGF DN1: 15 files in batch (75% coverage).
Written NESM3_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (15, 75%)
NCI ESGF: 5 files in batch (25% coverage).
Written NESM3_r1i1p1f1_Omon_6vars-inc-zostoga_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (5, 25%)
---- NorCPM1 r1i1p1f1 ----
zostoga: 8 file results to process.
zostoga: 6 Globus URLs saved.
zostoga: 2 unique filenames found (including non-Globus results).
thetaoga: 8 file results to process.
thetaoga: 6 Globus URLs saved.
thetaoga: 2 unique filenames found (including non-Globus results).
zos: 40 file results to process.
zos: 23 Globus URLs saved.
zos: 10 unique filenames found (including non-Globus results).
hfds: 34 file results to process.
hfds: 23 Globus URLs saved.
hfds: 10 unique filenames found (including non-Globus results).
hfgeou: 0 file results to process.
hfgeou: No Globus URLs saved.
hfgeou: 0 unique filenames found (including non-Globus results).
hfcorr: 0 file results to proc

CEDA ESGF DN1: 121 files in batch (100% coverage).
Written UKESM1-0-LL_r1i1p1f2_Omon_6vars-inc-zostoga_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (121, 100%)
DKRZ ESGF CMIP6: 66 files in batch (55% coverage).
Written UKESM1-0-LL_r1i1p1f2_Omon_6vars-inc-zostoga_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (66, 55%)
NCI ESGF: 29 files in batch (24% coverage).
Written UKESM1-0-LL_r1i1p1f2_Omon_6vars-inc-zostoga_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (29, 24%)
CPU times: user 5min 35s, sys: 24.7 s, total: 6min
Wall time: 2h 31min 10s


In [11]:
%%time
# Monthy atmospheric data variables and experiments of interest
frequency = 'mon'
variables = ['rlut', 'rsdt', 'rsut', 'tas']  # 2D atmos
experiments = ['piControl', '1pctCO2', 'abrupt-4xCO2', 'historical', 'ssp585', 'ssp370', 'ssp245', 'ssp126']
# Loop over source-member pairs
for source_id, member_id in source_member_experiment_dict.keys():
    temp_dict = write_globus_batch_files(source_id=source_id,
                                         member_id=member_id,
                                         variables=variables,
                                         frequency=frequency,
                                         experiments=experiments)
    main_batch_fn_ep_dict.update(temp_dict)  # update dictionary with new filenames and endpoints

---- ACCESS-CM2 r1i1p1f1 ----
rlut: 46 file results to process.
rlut: 36 Globus URLs saved.
rlut: 10 unique filenames found (including non-Globus results).
rsdt: 46 file results to process.
rsdt: 36 Globus URLs saved.
rsdt: 10 unique filenames found (including non-Globus results).
rsut: 46 file results to process.
rsut: 36 Globus URLs saved.
rsut: 10 unique filenames found (including non-Globus results).
tas: 46 file results to process.
tas: 36 Globus URLs saved.
tas: 10 unique filenames found (including non-Globus results).
LLNL ESGF: 40 files in batch (100% coverage).
Written ACCESS-CM2_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (40, 100%)
CEDA ESGF DN1: 40 files in batch (100% coverage).
Written ACCESS-CM2_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (40, 100%)
NCI ESGF: 40 files in batch (100% coverage).
Written ACCESS-CM2_r1i1p1f1_Amon_4vars-inc-rlut_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (40, 100%)
DKRZ ESGF 

LLNL ESGF: 32 files in batch (89% coverage).
Written CAMS-CSM1-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (32, 89%)
---- CAS-ESM2-0 r1i1p1f1 ----
rlut: 28 file results to process.
rlut: 12 Globus URLs saved.
rlut: 8 unique filenames found (including non-Globus results).
rsdt: 28 file results to process.
rsdt: 12 Globus URLs saved.
rsdt: 8 unique filenames found (including non-Globus results).
rsut: 28 file results to process.
rsut: 12 Globus URLs saved.
rsut: 8 unique filenames found (including non-Globus results).
tas: 28 file results to process.
tas: 12 Globus URLs saved.
tas: 8 unique filenames found (including non-Globus results).
CEDA ESGF DN1: 28 files in batch (88% coverage).
Written CAS-ESM2-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (28, 88%)
LLNL ESGF: 19 files in batch (59% coverage).
Written CAS-ESM2-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (19, 59%)
NCI ESGF: 1 files 

globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
rsut: 51 Globus URLs saved.
rsut: 16 unique filenames found (including non-Globus results).
tas: 83 file results to process.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks suspect. Skipping.
globus_ep = "thudess" looks s

tas: 56 Globus URLs saved.
tas: 15 unique filenames found (including non-Globus results).
LLNL ESGF: 60 files in batch (100% coverage).
Written CanESM5_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (60, 100%)
CEDA ESGF DN1: 60 files in batch (100% coverage).
Written CanESM5_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (60, 100%)
DKRZ ESGF CMIP6: 60 files in batch (100% coverage).
Written CanESM5_r1i1p1f1_Amon_4vars-inc-rlut_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (60, 100%)
NCI ESGF: 41 files in batch (68% coverage).
Written CanESM5_r1i1p1f1_Amon_4vars-inc-rlut_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (41, 68%)
---- CanESM5 r1i1p2f1 ----
rlut: 59 file results to process.
rlut: 46 Globus URLs saved.
rlut: 13 unique filenames found (including non-Globus results).
rsdt: 51 file results to process.
rsdt: 38 Globus URLs saved.
rsdt: 13 unique filenames found (including non-Globus results).
rsut: 64 file results to pro

Written EC-Earth3-AerChem_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (3448, 100%)
CEDA ESGF DN1: 3448 files in batch (100% coverage).
Written EC-Earth3-AerChem_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (3448, 100%)
DKRZ ESGF CMIP6: 1937 files in batch (56% coverage).
Written EC-Earth3-AerChem_r1i1p1f1_Amon_4vars-inc-rlut_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (1937, 56%)
NCI ESGF: 230 files in batch (7% coverage).
Written EC-Earth3-AerChem_r1i1p1f1_Amon_4vars-inc-rlut_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (230, 7%)
---- EC-Earth3-CC r1i1p1f1 ----
rlut: 4520 file results to process.
rlut: 2176 Globus URLs saved.
rlut: 1172 unique filenames found (including non-Globus results).
rsdt: 4520 file results to process.
rsdt: 2176 Globus URLs saved.
rsdt: 1172 unique filenames found (including non-Globus results).
rsut: 4520 file results to process.
rsut: 2176 Globus URLs saved.
rsut: 1172 unique filenames found

rlut: 30 file results to process.
rlut: 24 Globus URLs saved.
rlut: 6 unique filenames found (including non-Globus results).
rsdt: 28 file results to process.
rsdt: 22 Globus URLs saved.
rsdt: 6 unique filenames found (including non-Globus results).
rsut: 27 file results to process.
rsut: 21 Globus URLs saved.
rsut: 6 unique filenames found (including non-Globus results).
tas: 30 file results to process.
tas: 24 Globus URLs saved.
tas: 6 unique filenames found (including non-Globus results).
None: 24 files in batch (100% coverage).
Written GFDL-ESM4_r1i1p1f1_Amon_4vars-inc-rlut_8exps_9805b3ba-d9bf-11e5-976c-22000b9da45e.txt (24, 100%)
LLNL ESGF: 24 files in batch (100% coverage).
Written GFDL-ESM4_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (24, 100%)
CEDA ESGF DN1: 24 files in batch (100% coverage).
Written GFDL-ESM4_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (24, 100%)
NCI ESGF: 11 files in batch (46% coverage).
Written

tas: 127 file results to process.
tas: 75 Globus URLs saved.
tas: 26 unique filenames found (including non-Globus results).
DKRZ ESGF CMIP6: 104 files in batch (100% coverage).
Written GISS-E2-1-H_r1i1p1f1_Amon_4vars-inc-rlut_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (104, 100%)
NCI ESGF: 75 files in batch (72% coverage).
Written GISS-E2-1-H_r1i1p1f1_Amon_4vars-inc-rlut_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (75, 72%)
LLNL ESGF: 72 files in batch (69% coverage).
Written GISS-E2-1-H_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (72, 69%)
CEDA ESGF DN1: 72 files in batch (69% coverage).
Written GISS-E2-1-H_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (72, 69%)
---- GISS-E2-1-H r1i1p1f2 ----
rlut: 147 file results to process.
rlut: 57 Globus URLs saved.
rlut: 45 unique filenames found (including non-Globus results).
rsdt: 143 file results to process.
rsdt: 53 Globus URLs saved.
rsdt: 45 unique filenames found (inclu

rlut: 125 file results to process.
rlut: 95 Globus URLs saved.
rlut: 28 unique filenames found (including non-Globus results).
rsdt: 123 file results to process.
rsdt: 93 Globus URLs saved.
rsdt: 28 unique filenames found (including non-Globus results).
rsut: 125 file results to process.
rsut: 95 Globus URLs saved.
rsut: 28 unique filenames found (including non-Globus results).
tas: 124 file results to process.
tas: 94 Globus URLs saved.
tas: 28 unique filenames found (including non-Globus results).
LLNL ESGF: 112 files in batch (100% coverage).
Written INM-CM5-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (112, 100%)
CEDA ESGF DN1: 112 files in batch (100% coverage).
Written INM-CM5-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (112, 100%)
DKRZ ESGF CMIP6: 112 files in batch (100% coverage).
Written INM-CM5-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (112, 100%)
NCI ESGF: 41 files in batc

LLNL ESGF: 12 files in batch (100% coverage).
Written MIROC-ES2H_r1i1p2f2_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (12, 100%)
---- MIROC-ES2H r1i1p3f2 ----
rlut: 6 file results to process.
rlut: 3 Globus URLs saved.
rlut: 3 unique filenames found (including non-Globus results).
rsdt: 6 file results to process.
rsdt: 3 Globus URLs saved.
rsdt: 3 unique filenames found (including non-Globus results).
rsut: 6 file results to process.
rsut: 3 Globus URLs saved.
rsut: 3 unique filenames found (including non-Globus results).
tas: 7 file results to process.
tas: 4 Globus URLs saved.
tas: 3 unique filenames found (including non-Globus results).
LLNL ESGF: 12 files in batch (100% coverage).
Written MIROC-ES2H_r1i1p3f2_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (12, 100%)
DKRZ ESGF CMIP6: 1 files in batch (8% coverage).
Written MIROC-ES2H_r1i1p3f2_Amon_4vars-inc-rlut_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (1, 8%)
---- MIROC-ES2H r1i1p4f2 

DKRZ ESGF CMIP6: 40 files in batch (100% coverage).
Written MRI-ESM2-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (40, 100%)
LLNL ESGF: 26 files in batch (65% coverage).
Written MRI-ESM2-0_r1i1p1f1_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (26, 65%)
---- NESM3 r1i1p1f1 ----
rlut: 57 file results to process.
rlut: 35 Globus URLs saved.
rlut: 11 unique filenames found (including non-Globus results).
rsdt: 57 file results to process.
rsdt: 35 Globus URLs saved.
rsdt: 11 unique filenames found (including non-Globus results).
rsut: 57 file results to process.
rsut: 35 Globus URLs saved.
rsut: 11 unique filenames found (including non-Globus results).
tas: 55 file results to process.
tas: 33 Globus URLs saved.
tas: 11 unique filenames found (including non-Globus results).
CEDA ESGF DN1: 44 files in batch (100% coverage).
Written NESM3_r1i1p1f1_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (44, 100%)
DKRZ ESGF CMIP6

rsut: 91 Globus URLs saved.
rsut: 34 unique filenames found (including non-Globus results).
tas: 153 file results to process.
tas: 118 Globus URLs saved.
tas: 35 unique filenames found (including non-Globus results).
CEDA ESGF DN1: 139 files in batch (100% coverage).
Written UKESM1-0-LL_r1i1p1f2_Amon_4vars-inc-rlut_8exps_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (139, 100%)
LLNL ESGF: 137 files in batch (99% coverage).
Written UKESM1-0-LL_r1i1p1f2_Amon_4vars-inc-rlut_8exps_415a6320-e49c-11e5-9798-22000b9da45e.txt (137, 99%)
DKRZ ESGF CMIP6: 77 files in batch (55% coverage).
Written UKESM1-0-LL_r1i1p1f2_Amon_4vars-inc-rlut_8exps_4981cd16-d651-11e6-9ccd-22000a1e3b52.txt (77, 55%)
NCI ESGF: 55 files in batch (40% coverage).
Written UKESM1-0-LL_r1i1p1f2_Amon_4vars-inc-rlut_8exps_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (55, 40%)
CPU times: user 8min 31s, sys: 32.7 s, total: 9min 4s
Wall time: 3h 41min 41s


## Write batch files for specific combinations of source-member pair, variables, frequency, and experiments
Specific custom combinations can be added in this section.

Caution: to reduce the risk of two transfers trying to write the same local file simultaneously, these custom combinations should not overlap with the batch files produced above (e.g. if variable 'zostoga' is requested for all suitable source-member pairs above, then do not include 'zostoga' in the custom combinations below - unless it is for a source-member pair and/or experiment not covered above.)

In [12]:
%%time
# List containing tuples of custom combinations (source_id, member_id, variables, frequency, experiments)
comb_list = [('ACCESS-CM2', 'r1i1p1f1', ['thetao',], 'mon', ['ssp585',],),  # test 3D variable
             ]
# Produce batch file for each custom combination
for comb in comb_list:
    source_id, member_id, variables, frequency, experiments = comb
    temp_dict = write_globus_batch_files(source_id=source_id,
                                         member_id=member_id,
                                         variables=variables,
                                         frequency=frequency,
                                         experiments=experiments)
    main_batch_fn_ep_dict.update(temp_dict)  # update dictionary with new filenames and endpoints

---- ACCESS-CM2 r1i1p1f1 ----
thetao: 65 file results to process.
thetao: 53 Globus URLs saved.
thetao: 29 unique filenames found (including non-Globus results).
NCI ESGF: 29 files in batch (100% coverage).
Written ACCESS-CM2_r1i1p1f1_Omon_thetao_ssp585_2058c7d6-a79f-11e6-9ad6-22000a1e3b52.txt (29, 100%)
LLNL ESGF: 12 files in batch (41% coverage).
Written ACCESS-CM2_r1i1p1f1_Omon_thetao_ssp585_415a6320-e49c-11e5-9798-22000b9da45e.txt (12, 41%)
CEDA ESGF DN1: 12 files in batch (41% coverage).
Written ACCESS-CM2_r1i1p1f1_Omon_thetao_ssp585_ee3aa1a0-7e4c-11e6-afc4-22000b92c261.txt (12, 41%)
CPU times: user 283 ms, sys: 87.6 ms, total: 371 ms
Wall time: 20.3 s


## Write main scripts for submission of Globus transfers
One main script for each source endpoint.

In [13]:
# Recommend that users do not run script if previously submitted Globus transfers are still running
caution_str = ('Please do not run this script if previously submitted Globus transfers are still running '
               '(to reduce risk of two or more transfers trying to access same local file simultaneously, '
               'and in light of Globus limits).')

In [14]:
# Loop over source endpoints
eps_set = set(main_batch_fn_ep_dict.values())
for globus_ep in sorted(eps_set):
    # Get name of endpoint using Globus CLI
    ep_json = ! globus endpoint show -F json {globus_ep}
    ep_json = json.loads(''.join(ep_json))
    ep_name = ep_json['display_name']
    print(f'{globus_ep} (Name: {ep_name})')
    # Get list of batch filenames for this endpoint
    batch_fn_list = [fn for fn, ep in main_batch_fn_ep_dict.items() if ep==globus_ep]
    print(f'{len(batch_fn_list)} batch file(s).')
    # Script filename
    try:
        script_fn = f'0_main_{globus_ep}_{ep_name.replace(" ", "-")}.sh'
    except AttributeError:
        script_fn = f'0_main_{globus_ep}.sh'
    # Write script
    with open(out_base.joinpath(script_fn), 'w') as writer:
        writer.write(f'#!/usr/bin/zsh\n'  # use zsh
                     f'\n'
                     f'# Written by create_cmip6_globus_batch_files.ipynb on {datetime.date.today()}.\n'
                     f'# Globus endpoint is {globus_ep} (Name: {ep_name}).\n'
                     f'# {len(batch_fn_list)} batch file(s).\n'
                     f'# Caution:\n'
                     f'# {caution_str}\n'
                     f'# Environment variables:\n'
                     f'# $GCP_EP_CMIP6 should point to destination Globus endpoint, including desired path.\n'
                     f'# Usage:\n'
                     f'# zsh {script_fn}\n'
                     f'\n'
                     f'# Is endpoint activated?\n'
                     f'globus endpoint is-activated {globus_ep}\n'
                     f'if [ $? -ne 0 ]; then\n'
                     f'    echo "{globus_ep} is not activated. Please activate then re-run this script."\n'
                     f'    globus endpoint activate --web {globus_ep}\n'
                     f'    exit 1\n'
                     f'fi\n'
                     f'\n'
                     f'# Submit batch transfers\n'
                    )
        # Loop over batch files
        for batch_fn in batch_fn_list:
            batch_label = batch_fn.split('.')[0]
            writer.write(f'echo {batch_label}\n'
                         f'globus transfer {globus_ep} $GCP_EP_CMIP6 --batch {globus_ep}/{batch_fn} '
                         f'--preserve-mtime --fail-on-quota-errors --skip-source-errors --sync-level checksum '
                         f'--label "{batch_label}"\n'
                         f'sleep 1\n')
        writer.write('\n'
                     'exit 0\n')
        print(f'Written {script_fn}')

04cc625e-a79a-11e9-b595-0e56e8fd6d5a (Name: icscamscma#esg.camscma.cn)
2 batch file(s).
Written 0_main_04cc625e-a79a-11e9-b595-0e56e8fd6d5a_icscamscma#esg.camscma.cn.sh
2058c7d6-a79f-11e6-9ad6-22000a1e3b52 (Name: NCI ESGF)
114 batch file(s).
Written 0_main_2058c7d6-a79f-11e6-9ad6-22000a1e3b52_NCI-ESGF.sh
2bfe767c-a073-11e6-b0e1-22000b92c261 (Name: NCAR ESGF Data Node)
8 batch file(s).
Written 0_main_2bfe767c-a073-11e6-b0e1-22000b92c261_NCAR-ESGF-Data-Node.sh
4101e3a0-b7df-11eb-a16a-5fad80e6400b (Name: esgfcmcc#esgf-node2.cmcc.it)
4 batch file(s).
Written 0_main_4101e3a0-b7df-11eb-a16a-5fad80e6400b_esgfcmcc#esgf-node2.cmcc.it.sh
415a6320-e49c-11e5-9798-22000b9da45e (Name: LLNL ESGF)
134 batch file(s).
Written 0_main_415a6320-e49c-11e5-9798-22000b9da45e_LLNL-ESGF.sh
4981cd16-d651-11e6-9ccd-22000a1e3b52 (Name: DKRZ ESGF CMIP6)
124 batch file(s).
Written 0_main_4981cd16-d651-11e6-9ccd-22000a1e3b52_DKRZ-ESGF-CMIP6.sh
90282ada-ddac-11e8-8c90-0a1d4c5c824a (Name: None)
4 batch file(s).
Written

In [15]:
! date

Mon Apr 25 19:04:51 +08 2022
