In [1]:
# Testing the downloading data method from:
# https://claut.gitlab.io/man_ccia/lab2.html
# Import the required modules
from pyesgf.search import SearchConnection
import os
import pandas as pd
import requests
from tqdm import tqdm

# Set the os environment to on
os.environ['ESGF_PYCLIENT_NO_FACETS_STAR_WARNING'] = "on"

In [2]:
# Import the functions
from testing_download_functions import query_data_esgf, extract_file_context, \
                                        download_file, extract_file_context_multithread, \
                                        check_file_exists_jasmin

In [3]:
# set the search connection
# to the LLNL search node
conn = SearchConnection('https://esgf-node.llnl.gov/esg-search', distrib=True)

In [4]:
# Find the models which have data on the esgf node for the following constraints
# The constraints are:
experiment_id = 'hist-GHG'
latest = True
variable_id = 'tas'
project = 'CMIP6'
table_id = 'Amon'
activity_id = 'DAMIP'

# Set up the params for the query
params = {
    "latest": latest,
    "project": project,
    "experiment_id": experiment_id,
    "variable_id": variable_id,
    "activity_id": activity_id,
    "table_id": table_id
}

# Query the database
query = conn.new_context(**params)

# Get the results
results = query.search()

print(len(results))

# Python
# Form a list of the unique 'source_id' values from the results
# Python
# Form a set of the unique 'source_id' values from the results
source_id_set = set(id for result in results for id in result.json['source_id'])

# Print the set
print(source_id_set)

597
{'CNRM-CM6-1', 'MRI-ESM2-0', 'CanESM5', 'GFDL-ESM4', 'E3SM-2-0', 'FGOALS-g3', 'CESM2', 'NorESM2-LM', 'ACCESS-CM2', 'HadGEM3-GC31-LL', 'MIROC6', 'ACCESS-ESM1-5', 'BCC-CSM2-MR', 'GISS-E2-1-G', 'E3SM-1-0', 'IPSL-CM6A-LR'}


In [5]:
print(len(source_id_set))

16


In [6]:
# # Constrain the source_id_set to the first 1 model
# source_id_set = list(source_id_set)[0:1]

# # Print the set
# print(source_id_set)

# Initialize an empty dictionary to store the results
max_results = {'source_id': None, 'data_node': None, 'num_results': 0}

# Create a list for the max_results dictionaries
max_results_list = []

# Set up the max results per source dictionary
max_results_per_source = {}

# Loop through the source_id_set and query which nodes have data for each model
for source_id in source_id_set:
    print("trying to find valid nodes for model: {}".format(source_id))
    # Set the source_id constraint
    params['source_id'] = source_id
    print(params)
    # Query the database
    model_query = conn.new_context(**params)
    # Get the results
    model_results = model_query.search()
    # Print the number of results
    print(len(model_results))

    # if the len of the model results is not 0
    if len(model_results) != 0:
        # Print the first result
        print(model_results[0].json['id'])

    # Identify the unique nodes (data_node) which have data for the model
    data_node_set = set(result.json['data_node'] for result in model_results)

    # Print the set
    print(data_node_set)

    # Loop through the data_node_set and query how many files are available for each 
    # node
    for data_node in data_node_set:
        print("trying to find valid files for node: {}".format(data_node))
        
        # Set up the params for the query
        params_node = params.copy()
        
        # Set the data_node constraint
        params_node['data_node'] = data_node
        # Query the database
        node_query = conn.new_context(**params_node)
        # Get the results
        node_results = node_query.search()
        # Print the number of results
        print(len(node_results))

        # If this source_id is not in max_results_per_source or this data_node has more results, update the dictionary
        if source_id not in max_results_per_source or len(node_results) > max_results_per_source[source_id]:
            max_results = {'source_id': source_id, 'data_node': data_node, 'num_results': len(node_results)}
            max_results_per_source[source_id] = len(node_results)

            # Append the max_results dictionary to the list
            max_results_list.append(max_results)
        else:
            print("this data_node has less results than the previous one")
            continue

# Print the dictionary
print(max_results_list)





trying to find valid nodes for model: CNRM-CM6-1
{'latest': True, 'project': 'CMIP6', 'experiment_id': 'hist-GHG', 'variable_id': 'tas', 'activity_id': 'DAMIP', 'table_id': 'Amon', 'source_id': 'CNRM-CM6-1'}


49
CMIP6.DAMIP.CNRM-CERFACS.CNRM-CM6-1.hist-GHG.r10i1p1f2.Amon.tas.gr.v20190308|aims3.llnl.gov
{'aims3.llnl.gov', 'esgf-data04.diasjp.net', 'esgf-data1.llnl.gov', 'esg1.umr-cnrm.fr', 'esgf3.dkrz.de', 'esgf.ceda.ac.uk', 'esgf.nci.org.au'}
trying to find valid files for node: aims3.llnl.gov
9
trying to find valid files for node: esgf-data04.diasjp.net
1
this data_node has less results than the previous one
trying to find valid files for node: esgf-data1.llnl.gov
1
this data_node has less results than the previous one
trying to find valid files for node: esg1.umr-cnrm.fr
10
trying to find valid files for node: esgf3.dkrz.de
10
this data_node has less results than the previous one
trying to find valid files for node: esgf.ceda.ac.uk
9
this data_node has less results than the previous one
trying to find valid files for node: esgf.nci.org.au
9
this data_node has less results than the previous one
trying to find valid nodes for model: MRI-ESM2-0
{'latest': True, 'project': 'CMIP6', 'experimen

In [7]:
# Clean the max_results_list to remove duplicate source_id entries
# Keep the entry with the highest number of results (num_results)
# Initialize an empty list to store the unique source_id entries
unique_source_id_list = []

# Loop through the max_results_list and append the unique source_id entries
for result in max_results_list:
    if result['source_id'] not in unique_source_id_list:
        unique_source_id_list.append(result['source_id'])

# Print the list
print(unique_source_id_list)

# Initialize an empty list to store the unique max_results_list entries
unique_max_results_list = []

# Loop through the unique_source_id_list and only
# Append the max_results_list entries which match the source_id and have the highest num_results
for source_id in unique_source_id_list:
    print("source_id: {}".format(source_id))
    # Initialize an empty list to store the num_results
    num_results_list = []
    # Loop through the max_results_list and append the num_results to the list
    for result in max_results_list:
        if result['source_id'] == source_id:
            num_results_list.append(result['num_results'])
    # Get the max num_results
    max_num_results = max(num_results_list)
    # Loop through the max_results_list and append the entries which match the source_id and max_num_results
    for result in max_results_list:
        if result['source_id'] == source_id and result['num_results'] == max_num_results:
            unique_max_results_list.append(result)

['CNRM-CM6-1', 'MRI-ESM2-0', 'CanESM5', 'GFDL-ESM4', 'E3SM-2-0', 'FGOALS-g3', 'CESM2', 'NorESM2-LM', 'ACCESS-CM2', 'HadGEM3-GC31-LL', 'MIROC6', 'ACCESS-ESM1-5', 'BCC-CSM2-MR', 'GISS-E2-1-G', 'E3SM-1-0', 'IPSL-CM6A-LR']
source_id: CNRM-CM6-1
source_id: MRI-ESM2-0
source_id: CanESM5
source_id: GFDL-ESM4
source_id: E3SM-2-0
source_id: FGOALS-g3
source_id: CESM2
source_id: NorESM2-LM
source_id: ACCESS-CM2
source_id: HadGEM3-GC31-LL
source_id: MIROC6
source_id: ACCESS-ESM1-5
source_id: BCC-CSM2-MR
source_id: GISS-E2-1-G
source_id: E3SM-1-0
source_id: IPSL-CM6A-LR


In [9]:
# Convert the unique_max_results_list to a dataframe
unique_max_results_df = pd.DataFrame.from_dict(unique_max_results_list)

# Print the dataframe
unique_max_results_df

# save the dataframe
# save in current directory + save_data + filename
save_dir = os.path.join(os.getcwd(), 'save_data')
save_filename = 'unique_max_results_df_{}_{}_{}_{}_{}.csv'.format(experiment_id, variable_id, project, table_id, activity_id)

# Form the save path
save_path = os.path.join(save_dir, save_filename)

# Check if the save directory exists
if not os.path.exists(save_dir):
    # Make the directory
    os.makedirs(save_dir)

# Save the dataframe
unique_max_results_df.to_csv(save_path)

In [7]:
save_dir = os.path.join(os.getcwd(), 'save_data')
save_filename = 'unique_max_results_df_{}_{}_{}_{}_{}.csv'.format(experiment_id, variable_id, project, table_id, activity_id)

# Form the save path
save_path = os.path.join(save_dir, save_filename)

In [10]:
# Open the save_path as a dataframe
unique_max_results_df = pd.read_csv(save_path)

results_list = []

# Loop over the dataframe to create a list of result sets
for i in range(len(unique_max_results_df)):
    
    # Get the source_id and data_node
    source_id = unique_max_results_df.loc[i, 'source_id']
    data_node = unique_max_results_df.loc[i, 'data_node']

    # Print the source_id and data_node
    print("source_id: {}, data_node: {}".format(source_id, data_node))

    # Print the experiment_id, variable_id, activity_id, and data_node
    print("experiment_id: {}, variable_id: {}, activity_id: {}, data_node: {}".format(experiment_id, variable_id, activity_id, data_node))

    results = query_data_esgf(conn,
                            source_id = source_id,
                            experiment_id = experiment_id,
                            variable_id = variable_id,
                            activity_id = activity_id,
                            data_node = data_node,
    )

    print(len(results))

    # Append the results to the results_list
    results_list.append(results)

# Print the results_list
print(results_list)

source_id: CNRM-CM6-1, data_node: esg1.umr-cnrm.fr
experiment_id: hist-GHG, variable_id: tas, activity_id: DAMIP, data_node: esg1.umr-cnrm.fr
10
source_id: MRI-ESM2-0, data_node: esgf-data03.diasjp.net
experiment_id: hist-GHG, variable_id: tas, activity_id: DAMIP, data_node: esgf-data03.diasjp.net
5
source_id: CanESM5, data_node: crd-esgf-drc.ec.gc.ca
experiment_id: hist-GHG, variable_id: tas, activity_id: DAMIP, data_node: crd-esgf-drc.ec.gc.ca
50
source_id: GFDL-ESM4, data_node: esgdata.gfdl.noaa.gov
experiment_id: hist-GHG, variable_id: tas, activity_id: DAMIP, data_node: esgdata.gfdl.noaa.gov
1
source_id: E3SM-2-0, data_node: esgf-data2.llnl.gov
experiment_id: hist-GHG, variable_id: tas, activity_id: DAMIP, data_node: esgf-data2.llnl.gov
5
source_id: FGOALS-g3, data_node: esg.lasg.ac.cn
experiment_id: hist-GHG, variable_id: tas, activity_id: DAMIP, data_node: esg.lasg.ac.cn
3
source_id: CESM2, data_node: esgf-data.ucar.edu
experiment_id: hist-GHG, variable_id: tas, activity_id: DAM

In [11]:
# Set up a list for the file context
file_context_list = []

# Loop through the results_list and print ther number of results
for results in results_list:
    print(len(results))

    # Extract the file context from the results
    file_context = extract_file_context(results)

    # Append the file_context to the file_context_list
    file_context_list.append(file_context)

# Print the file_context_list
print(file_context_list)

10
Extracting file context for 10 datasets...
Processed 1 out of 10 results.
Processed 2 out of 10 results.
Processed 3 out of 10 results.
Processed 4 out of 10 results.
Processed 5 out of 10 results.
Processed 6 out of 10 results.
Processed 7 out of 10 results.
Processed 8 out of 10 results.
Processed 9 out of 10 results.
Processed 10 out of 10 results.
5
Extracting file context for 5 datasets...
Processed 1 out of 5 results.
Processed 2 out of 5 results.
Processed 3 out of 5 results.
Processed 4 out of 5 results.
Processed 5 out of 5 results.
50
Extracting file context for 50 datasets...
Processed 1 out of 50 results.
Processed 2 out of 50 results.
Processed 3 out of 50 results.
Processed 4 out of 50 results.
Processed 5 out of 50 results.
Processed 6 out of 50 results.
Processed 7 out of 50 results.
Processed 8 out of 50 results.
Processed 9 out of 50 results.
Processed 10 out of 50 results.
Processed 11 out of 50 results.
Processed 12 out of 50 results.
Processed 13 out of 50 resul

In [15]:
# Create an empty DataFrame
df = pd.DataFrame()

# Loop through the file_context_list
for file_context in file_context_list:
    # Convert the dictionary to a DataFrame
    temp_df = pd.DataFrame.from_dict(file_context)

    # Append the temp_df to the main df
    df = df.append(temp_df, ignore_index=True)

# Print the df
df

  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)
  df = df.append(temp_df, ignore_index=True)


Unnamed: 0,filename,url
0,tas_Amon_CNRM-CM6-1_hist-GHG_r9i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...
1,tas_Amon_CNRM-CM6-1_hist-GHG_r10i1p1f2_gr_1850...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...
2,tas_Amon_CNRM-CM6-1_hist-GHG_r5i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...
3,tas_Amon_CNRM-CM6-1_hist-GHG_r4i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...
4,tas_Amon_CNRM-CM6-1_hist-GHG_r8i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...
...,...,...
543,tas_Amon_IPSL-CM6A-LR_hist-GHG_r3i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...
544,tas_Amon_IPSL-CM6A-LR_hist-GHG_r1i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...
545,tas_Amon_IPSL-CM6A-LR_hist-GHG_r4i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...
546,tas_Amon_IPSL-CM6A-LR_hist-GHG_r8i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...


In [21]:
# reload for modules
import importlib
import sys

# import the functions
_ = importlib.reload(sys.modules['testing_download_functions'])

from testing_download_functions import query_data_esgf, extract_file_context, \
                                        download_file, extract_file_context_multithread, \
                                        check_file_exists_jasmin

In [22]:
# Check whether the files exist on JASMIN
import glob

# Set up the file_context_df
file_context_df = df.copy()

# We want to verify whether these files exist on JASMIN
damip_dir = "/badc/cmip6/data/CMIP6/DAMIP/"

# Test the function
files_df = check_file_exists_jasmin(file_context_df, damip_dir)

files_df

CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r9i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r10i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r5i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r4i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r8i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r1i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r6i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r2i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-GHG_r7i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File does not exist for tas_Amon_CNRM-CM6-1_hist-GHG_r3i1p1f2_gr_185001-202012.nc
MRI
File exists for tas_Amon_MRI-ESM2-0_hist-GHG_r2i1p1f1_gn_185001-202012.nc
MRI
File exists for tas_Amon_MRI-ESM2-0_his

Unnamed: 0,filename,url,file_exists,filepath
0,tas_Amon_CNRM-CM6-1_hist-GHG_r9i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...,True,/badc/cmip6/data/CMIP6/DAMIP/CNRM-CERFACS/CNRM...
1,tas_Amon_CNRM-CM6-1_hist-GHG_r10i1p1f2_gr_1850...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...,True,/badc/cmip6/data/CMIP6/DAMIP/CNRM-CERFACS/CNRM...
2,tas_Amon_CNRM-CM6-1_hist-GHG_r5i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...,True,/badc/cmip6/data/CMIP6/DAMIP/CNRM-CERFACS/CNRM...
3,tas_Amon_CNRM-CM6-1_hist-GHG_r4i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...,True,/badc/cmip6/data/CMIP6/DAMIP/CNRM-CERFACS/CNRM...
4,tas_Amon_CNRM-CM6-1_hist-GHG_r8i1p1f2_gr_18500...,http://esg1.umr-cnrm.fr/thredds/fileServer/CMI...,True,/badc/cmip6/data/CMIP6/DAMIP/CNRM-CERFACS/CNRM...
...,...,...,...,...
543,tas_Amon_IPSL-CM6A-LR_hist-GHG_r3i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...,True,/badc/cmip6/data/CMIP6/DAMIP/IPSL/IPSL-CM6A-LR...
544,tas_Amon_IPSL-CM6A-LR_hist-GHG_r1i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...,True,/badc/cmip6/data/CMIP6/DAMIP/IPSL/IPSL-CM6A-LR...
545,tas_Amon_IPSL-CM6A-LR_hist-GHG_r4i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...,True,/badc/cmip6/data/CMIP6/DAMIP/IPSL/IPSL-CM6A-LR...
546,tas_Amon_IPSL-CM6A-LR_hist-GHG_r8i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...,True,/badc/cmip6/data/CMIP6/DAMIP/IPSL/IPSL-CM6A-LR...


In [23]:
# We only want to download the files which don't already exist on JASMIN
# Set up the download directory
download_dir = "/gws/nopw/j04/scenario/users/benhutch/"

# Loop through files_df and download the files
# First constrain the dataframe to only the files which don't already exist on JASMIN
files_df = files_df[files_df['file_exists'] == False]

# Reset the index
files_df.reset_index(drop=True, inplace=True)

# Loop through the files_df and download the files
for i in tqdm(range(len(files_df))):
    # Get the file_url
    file_url = files_df.loc[i, 'url']

    # Get the filename
    filename = files_df.loc[i, 'filename']

    # Split the filename and extract the variable name
    variable = filename.split('_')[0]

    # Split the filename to get the experiment name
    experiment = filename.split('_')[3]

    # Set up the model
    model = filename.split('_')[2]

    # Set up the download directory
    download_dir_loop = os.path.join(download_dir, experiment, variable, model)

    # If the download directory doesn't exist, make it
    if not os.path.exists(download_dir_loop):
        os.makedirs(download_dir_loop)

    # Set up the download path
    download_path = os.path.join(download_dir_loop, filename)

    # In the filepath column of the dataframe
    # replace the current file path with the download path
    files_df.loc[i, 'filepath'] = download_path

    # Set up the request
    r = requests.get(file_url, stream=True)

    # Set up the total size
    total_size = int(r.headers.get('content-length', 0))
    
    # Set up the block size
    block_size = 1024

    # Download the file
    with open(download_path, 'wb') as f:
        for data in tqdm(r.iter_content(block_size), 
                        total = total_size//block_size, 
                        unit = 'KiB', 
                        unit_scale = True):
            f.write(data)

        # If the total size is no 0
        if total_size != 0:
            print("File is not empty")
            print("Download complete - file saved to {}".format(download_path))


  0%|          | 0/266 [00:00<?, ?it/s]

145kKiB [00:16, 8.91kKiB/s]                         


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CNRM-CM6-1/tas_Amon_CNRM-CM6-1_hist-GHG_r3i1p1f2_gr_185001-202012.nc


228kKiB [01:26, 2.62kKiB/s]                       
  1%|          | 2/266 [01:48<4:25:58, 60.45s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/MRI-ESM2-0/tas_Amon_MRI-ESM2-0_hist-GHG_r4i1p1f1_gn_185001-202012.nc


53.1kKiB [00:07, 7.56kKiB/s]                          
  1%|          | 3/266 [01:58<2:43:40, 37.34s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r10i1p2f1_gn_185001-202012.nc


53.1kKiB [00:07, 7.16kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r11i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 26.1kKiB/s]                          
  2%|▏         | 5/266 [02:16<1:23:51, 19.28s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r12i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 26.4kKiB/s]                          
  2%|▏         | 6/266 [02:19<59:19, 13.69s/it]  

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r13i1p1f1_gn_185001-202012.nc


53.1kKiB [00:05, 9.84kKiB/s]                          
  3%|▎         | 7/266 [02:25<48:30, 11.24s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r11i1p1f1_gn_185001-202012.nc


53.1kKiB [00:13, 4.01kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r12i1p2f1_gn_185001-202012.nc


53.1kKiB [00:06, 8.36kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r13i1p2f1_gn_185001-202012.nc


53.1kKiB [00:04, 12.0kKiB/s]                          
  4%|▍         | 10/266 [02:52<38:42,  9.07s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r14i1p1f1_gn_185001-202012.nc


53.1kKiB [00:08, 6.24kKiB/s]                          
  4%|▍         | 11/266 [03:01<38:41,  9.11s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r16i1p1f1_gn_185001-202012.nc


53.1kKiB [00:14, 3.70kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r14i1p2f1_gn_185001-202012.nc


53.1kKiB [00:03, 15.2kKiB/s]                          
  5%|▍         | 13/266 [03:21<37:39,  8.93s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r15i1p1f1_gn_185001-202012.nc


53.1kKiB [00:15, 3.38kKiB/s]                          
  5%|▌         | 14/266 [03:37<47:07, 11.22s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r15i1p2f1_gn_185001-202012.nc


53.1kKiB [00:07, 7.37kKiB/s]                          
  6%|▌         | 15/266 [03:45<42:53, 10.25s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r16i1p2f1_gn_185001-202012.nc


53.1kKiB [00:14, 3.78kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r17i1p1f1_gn_185001-202012.nc


53.1kKiB [00:05, 10.5kKiB/s]                          
  6%|▋         | 17/266 [04:07<41:49, 10.08s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r17i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 25.1kKiB/s]                          
  7%|▋         | 18/266 [04:10<32:41,  7.91s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r18i1p1f1_gn_185001-202012.nc


53.1kKiB [00:29, 1.81kKiB/s]                          
  7%|▋         | 19/266 [04:40<1:00:00, 14.58s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r18i1p2f1_gn_185001-202012.nc


53.1kKiB [00:16, 3.32kKiB/s]                          
  8%|▊         | 20/266 [04:56<1:02:20, 15.21s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r19i1p1f1_gn_185001-202012.nc


53.1kKiB [00:07, 7.43kKiB/s]                          
  8%|▊         | 21/266 [05:04<53:01, 12.99s/it]  

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r19i1p2f1_gn_185001-202012.nc


53.1kKiB [00:06, 8.74kKiB/s]                          
  8%|▊         | 22/266 [05:11<45:19, 11.15s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r21i1p2f1_gn_185001-202012.nc


53.1kKiB [00:20, 2.55kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r23i1p1f1_gn_185001-202012.nc


53.1kKiB [00:09, 5.43kKiB/s]                          
  9%|▉         | 24/266 [05:44<53:37, 13.30s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r9i1p2f1_gn_185001-202012.nc


53.1kKiB [00:26, 2.00kKiB/s]                          
  9%|▉         | 25/266 [06:11<1:10:23, 17.53s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r22i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 18.9kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r24i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 26.3kKiB/s]                          
 10%|█         | 27/266 [06:18<40:34, 10.19s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r25i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 25.8kKiB/s]                          
 11%|█         | 28/266 [06:20<31:32,  7.95s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r20i1p1f1_gn_185001-202012.nc


53.1kKiB [00:17, 3.02kKiB/s]                          
 11%|█         | 29/266 [06:39<43:47, 11.09s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r22i1p2f1_gn_185001-202012.nc


53.1kKiB [00:07, 7.22kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r5i1p2f1_gn_185001-202012.nc


53.1kKiB [00:24, 2.17kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r7i1p2f1_gn_185001-202012.nc


53.1kKiB [00:13, 4.05kKiB/s]                          
 12%|█▏        | 32/266 [07:27<57:16, 14.69s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r20i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 24.9kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r24i1p2f1_gn_185001-202012.nc


53.1kKiB [00:22, 2.37kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r25i1p1f1_gn_185001-202012.nc


53.1kKiB [00:06, 8.57kKiB/s]54<57:27, 14.86s/it]
 13%|█▎        | 35/266 [08:01<47:56, 12.45s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r23i1p2f1_gn_185001-202012.nc


53.1kKiB [00:04, 12.6kKiB/s]                          
 14%|█▎        | 36/266 [08:06<39:10, 10.22s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r4i1p2f1_gn_185001-202012.nc


53.1kKiB [00:06, 8.01kKiB/s]                          
 14%|█▍        | 37/266 [08:13<35:46,  9.37s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r8i1p2f1_gn_185001-202012.nc


53.1kKiB [00:05, 9.80kKiB/s]                          
 14%|█▍        | 38/266 [08:19<31:56,  8.41s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r21i1p1f1_gn_185001-202012.nc


53.1kKiB [00:04, 13.2kKiB/s]                          
 15%|█▍        | 39/266 [08:24<27:44,  7.33s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/CanESM5/tas_Amon_CanESM5_hist-GHG_r6i1p2f1_gn_185001-202012.nc


108kKiB [00:11, 9.62kKiB/s]                         
 15%|█▌        | 40/266 [08:36<33:30,  8.90s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r1i1p1f1_gr_185001-189912.nc


108kKiB [00:10, 10.3kKiB/s]                         


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r1i1p1f1_gr_190001-194912.nc


108kKiB [00:17, 6.23kKiB/s]                         
 16%|█▌        | 42/266 [09:10<48:12, 12.91s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r1i1p1f1_gr_195001-199912.nc


32.5kKiB [00:10, 3.12kKiB/s]                          
 16%|█▌        | 43/266 [09:21<45:55, 12.36s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r1i1p1f1_gr_200001-201412.nc


108kKiB [00:13, 8.26kKiB/s]                         
 17%|█▋        | 44/266 [09:34<47:12, 12.76s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r2i1p1f1_gr_185001-189912.nc


108kKiB [00:11, 9.38kKiB/s]                         
 17%|█▋        | 45/266 [09:46<46:14, 12.56s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r2i1p1f1_gr_190001-194912.nc


108kKiB [00:39, 2.74kKiB/s]                         


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r2i1p1f1_gr_195001-199912.nc


32.5kKiB [00:03, 9.46kKiB/s]                          
 18%|█▊        | 47/266 [10:33<59:09, 16.21s/it]  

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r2i1p1f1_gr_200001-201412.nc


108kKiB [00:19, 5.52kKiB/s]                         
 18%|█▊        | 48/266 [10:53<1:03:19, 17.43s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r5i1p1f1_gr_185001-189912.nc


108kKiB [00:11, 9.49kKiB/s]                         
 18%|█▊        | 49/266 [11:05<57:05, 15.79s/it]  

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r5i1p1f1_gr_190001-194912.nc


108kKiB [00:17, 6.07kKiB/s]                         
 19%|█▉        | 50/266 [11:23<59:36, 16.56s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r5i1p1f1_gr_195001-199912.nc


32.4kKiB [00:03, 10.8kKiB/s]                          
 19%|█▉        | 51/266 [11:27<45:31, 12.70s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r5i1p1f1_gr_200001-201412.nc


108kKiB [00:10, 10.8kKiB/s]                         
 20%|█▉        | 52/266 [11:37<43:06, 12.09s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r3i1p1f1_gr_185001-189912.nc


108kKiB [00:15, 7.20kKiB/s]                         


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r3i1p1f1_gr_190001-194912.nc


108kKiB [00:14, 7.44kKiB/s]                         
 20%|██        | 54/266 [12:09<49:06, 13.90s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r3i1p1f1_gr_195001-199912.nc


32.5kKiB [00:02, 14.7kKiB/s]                          
 21%|██        | 55/266 [12:12<37:11, 10.58s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r3i1p1f1_gr_200001-201412.nc


108kKiB [00:09, 11.6kKiB/s]                         
 21%|██        | 56/266 [12:22<36:18, 10.38s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r4i1p1f1_gr_185001-189912.nc


108kKiB [00:27, 3.87kKiB/s]                         
 21%|██▏       | 57/266 [12:50<55:06, 15.82s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r4i1p1f1_gr_190001-194912.nc


108kKiB [00:17, 6.07kKiB/s]                         
 22%|██▏       | 58/266 [13:09<57:29, 16.59s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r4i1p1f1_gr_195001-199912.nc


32.5kKiB [00:05, 6.37kKiB/s]                          
 22%|██▏       | 59/266 [13:14<46:05, 13.36s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-GHG_r4i1p1f1_gr_200001-201412.nc


6.77kKiB [01:59, 56.8KiB/s]                          
 23%|██▎       | 60/266 [15:17<2:38:13, 46.08s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/FGOALS-g3/tas_Amon_FGOALS-g3_hist-GHG_r1i1p1f1_gn_188001-188912.nc


6.77kKiB [01:49, 61.7KiB/s]                          
 23%|██▎       | 61/266 [17:07<3:43:25, 65.39s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/FGOALS-g3/tas_Amon_FGOALS-g3_hist-GHG_r1i1p1f1_gn_196001-196912.nc


6.77kKiB [00:58, 115KiB/s]                           
 23%|██▎       | 62/266 [18:07<3:36:12, 63.59s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/FGOALS-g3/tas_Amon_FGOALS-g3_hist-GHG_r1i1p1f1_gn_197001-197912.nc


7.45kKiB [01:07, 110KiB/s]                          
 24%|██▎       | 63/266 [19:15<3:40:23, 65.14s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/FGOALS-g3/tas_Amon_FGOALS-g3_hist-GHG_r1i1p1f1_gn_201001-202012.nc


6.77kKiB [00:50, 135KiB/s]                           
 24%|██▍       | 64/266 [20:07<3:25:09, 60.94s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/FGOALS-g3/tas_Amon_FGOALS-g3_hist-GHG_r2i1p1f1_gn_193001-193912.nc


6.77kKiB [01:17, 87.8KiB/s]                          
 24%|██▍       | 65/266 [21:25<3:41:20, 66.07s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-GHG/tas/FGOALS-g3/tas_Amon_FGOALS-g3_hist-GHG_r2i1p1f1_gn_198001-198912.nc


 70%|███████   | 4.75k/6.77k [01:10<00:29, 67.4KiB/s]
 24%|██▍       | 65/266 [22:36<1:09:54, 20.87s/it]


KeyboardInterrupt: 

In [14]:
# Extract all of the file_context into dictionaries
# and then append them to a dataframe
# Initialize an empty list to store the dictionaries
# Create an empty dataframe
file_context_df = pd.DataFrame()


# For each file_context
for file_context in file_context_list:
    # Append the file_context to the dataframe
    file_context_df = file_context_df.append(file_context, ignore_index=True)

  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_conte

In [None]:
# Python
# Form a list of the unique 'source_id' values from the results
source_id_list = list(set([result.json['source_id'] for result in results]))

# Print the list
print(source_id_list)

In [5]:
# test the function for querying the database
results = query_data_esgf(conn,
                        source_id='E3SM-2-0',
                        experiment_id='hist-aer',
                        variable_id='tas',
                        table_id='Amon',
                        data_node='esgf-data2.llnl.gov',)

# print the len of the results
print(len(results))

# print the type of the results
print(type(results))

# print the results
print(results)

5
<class 'pyesgf.search.results.ResultSet'>
<pyesgf.search.results.ResultSet object at 0x7f87bf91f130>


In [6]:
# Print the details of the first result
print(results[0].json['id'])

CMIP6.DAMIP.E3SM-Project.E3SM-2-0.hist-aer.r2i1p1f1.Amon.tas.gr.v20220906|esgf-data2.llnl.gov


In [7]:
# Extract the file context
# files_list = extract_file_context(results)

# # # Turn the list into a dataframe
# # files_df = pd.DataFrame.from_dict(files_list)

# # files_df

In [8]:
files_list_mt = extract_file_context_multithread(results)

files_list_mt

Extracting file context for 5 datasets...


[{'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-201412.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/

In [9]:
files_list = files_list_mt

In [11]:
# Print the type of the files list
print(type(files_list))

# Extract this into a dataframe
files_df = pd.DataFrame.from_dict(files_list)
files_df

# Assert that all filenames contrain the string "185001" and "202012"
# assert all(files_df['filename'].str.contains('185001')), "Not all filenames contain the string 185001"
# assert all(files_df['filename'].str.contains('202012')), "Not all filenames contain the string 202012"

files_df

<class 'list'>


Unnamed: 0,filename,url
0,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
1,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
2,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
3,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
4,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
5,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
6,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
7,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
8,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
9,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...


In [12]:
import glob

# We want to verify whether these files exist on JASMIN
damip_dir = "/badc/cmip6/data/CMIP6/DAMIP/"

# Test the function
files_df = check_file_exists_jasmin(files_df, damip_dir)

files_df

E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-201412.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-199912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-201412.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1

Unnamed: 0,filename,url,file_exists
0,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
1,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
2,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
3,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
4,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
5,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
6,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
7,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
8,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
9,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False


In [10]:
# Set up the directory to download to
download_dir = "/gws/nopw/j04/scenario/users/benhutch/DAMIP"

# Set up the variable
variable = 'tas'

# Set up the experiment id
experiment_id = 'hist-aer'

# Set up the model
model = 'CanESM5'

# Set up the directory
download_path = os.path.join(download_dir, experiment_id, 
                             variable, model)

# Print the download path
print(download_path)

# Use the download function to download a single file
download_file(files_df['url'][0], 
              files_df['filename'][0], download_path)

/gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5
Downloading tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r1i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc


53.1kKiB [00:46, 1.13kKiB/s]                          


Downloaded size does not match expected size!
 FYI, the status code was  200


In [11]:
# Download all the files
for i in tqdm(range(len(files_df))):
    download_file(files_df['url'][i], 
                  files_df['filename'][i], download_path)

  0%|          | 0/5 [00:00<?, ?it/s]

Downloading tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r1i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc


53.1kKiB [00:05, 10.5kKiB/s]                          
 20%|██        | 1/5 [00:05<00:23,  5.86s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r4i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 22.6kKiB/s]                          
 40%|████      | 2/5 [00:08<00:12,  4.25s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r2i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc


53.1kKiB [00:19, 2.73kKiB/s]                          


Downloaded size does not match expected size!
 FYI, the status code was  200


 60%|██████    | 3/5 [00:29<00:23, 11.58s/it]

Downloading tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r5i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 23.0kKiB/s]                          
 80%|████████  | 4/5 [00:32<00:08,  8.18s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r3i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc


53.1kKiB [00:13, 3.82kKiB/s]                          
100%|██████████| 5/5 [00:46<00:00,  9.39s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200



