In [1]:
# Testing the downloading data method from:
# https://claut.gitlab.io/man_ccia/lab2.html
# Import the required modules
from pyesgf.search import SearchConnection
import os
import pandas as pd
import requests
from tqdm import tqdm

# Set the os environment to on
os.environ['ESGF_PYCLIENT_NO_FACETS_STAR_WARNING'] = "on"

In [2]:
# Import the functions
from testing_download_functions import query_data_esgf, extract_file_context, \
                                        download_file, extract_file_context_multithread, \
                                        check_file_exists_jasmin

In [3]:
# set the search connection
# to the LLNL search node
conn = SearchConnection('https://esgf-node.llnl.gov/esg-search', distrib=True)

In [4]:
# Find the models which have data on the esgf node for the following constraints
# The constraints are:
experiment_id = 'hist-aer'
latest = True
variable_id = 'tas'
project = 'CMIP6'
table_id = 'Amon'
activity_id = 'DAMIP'

# Set up the params for the query
params = {
    "latest": latest,
    "project": project,
    "experiment_id": experiment_id,
    "variable_id": variable_id,
    "activity_id": activity_id,
    "table_id": table_id
}

# Query the database
query = conn.new_context(**params)

# Get the results
results = query.search()

print(len(results))

# Python
# Form a list of the unique 'source_id' values from the results
# Python
# Form a set of the unique 'source_id' values from the results
source_id_set = set(id for result in results for id in result.json['source_id'])

# Print the set
print(source_id_set)

232
{'IPSL-CM6A-LR', 'CNRM-CM6-1', 'BCC-CSM2-MR', 'MRI-ESM2-0', 'HadGEM3-GC31-LL', 'GISS-E2-1-G', 'GFDL-ESM4', 'ACCESS-ESM1-5', 'NorESM2-LM', 'ACCESS-CM2', 'MIROC6', 'CanESM5', 'E3SM-2-0', 'FGOALS-g3', 'CESM2'}


In [5]:
print(len(source_id_set))

15


In [6]:
# # Constrain the source_id_set to the first 1 model
# source_id_set = list(source_id_set)[0:1]

# # Print the set
# print(source_id_set)

# Initialize an empty dictionary to store the results
max_results = {'source_id': None, 'data_node': None, 'num_results': 0}

# Create a list for the max_results dictionaries
max_results_list = []

# Set up the max results per source dictionary
max_results_per_source = {}

# Loop through the source_id_set and query which nodes have data for each model
for source_id in source_id_set:
    print("trying to find valid nodes for model: {}".format(source_id))
    # Set the source_id constraint
    params['source_id'] = source_id
    print(params)
    # Query the database
    model_query = conn.new_context(**params)
    # Get the results
    model_results = model_query.search()
    # Print the number of results
    print(len(model_results))

    # if the len of the model results is not 0
    if len(model_results) != 0:
        # Print the first result
        print(model_results[0].json['id'])

    # Identify the unique nodes (data_node) which have data for the model
    data_node_set = set(result.json['data_node'] for result in model_results)

    # Print the set
    print(data_node_set)

    # Loop through the data_node_set and query how many files are available for each 
    # node
    for data_node in data_node_set:
        print("trying to find valid files for node: {}".format(data_node))
        
        # Set up the params for the query
        params_node = params.copy()
        
        # Set the data_node constraint
        params_node['data_node'] = data_node
        # Query the database
        node_query = conn.new_context(**params_node)
        # Get the results
        node_results = node_query.search()
        # Print the number of results
        print(len(node_results))

        # If this source_id is not in max_results_per_source or this data_node has more results, update the dictionary
        if source_id not in max_results_per_source or len(node_results) > max_results_per_source[source_id]:
            max_results = {'source_id': source_id, 'data_node': data_node, 'num_results': len(node_results)}
            max_results_per_source[source_id] = len(node_results)

            # Append the max_results dictionary to the list
            max_results_list.append(max_results)
        else:
            print("this data_node has less results than the previous one")
            continue

# Print the dictionary
print(max_results_list)





trying to find valid nodes for model: IPSL-CM6A-LR
{'latest': True, 'project': 'CMIP6', 'experiment_id': 'hist-aer', 'variable_id': 'tas', 'activity_id': 'DAMIP', 'table_id': 'Amon', 'source_id': 'IPSL-CM6A-LR'}
11
CMIP6.DAMIP.IPSL.IPSL-CM6A-LR.hist-aer.r10i1p1f1.Amon.tas.gr.v20180914|aims3.llnl.gov
{'aims3.llnl.gov', 'esgf-data04.diasjp.net'}
trying to find valid files for node: aims3.llnl.gov
10
trying to find valid files for node: esgf-data04.diasjp.net
1
this data_node has less results than the previous one
trying to find valid nodes for model: CNRM-CM6-1
{'latest': True, 'project': 'CMIP6', 'experiment_id': 'hist-aer', 'variable_id': 'tas', 'activity_id': 'DAMIP', 'table_id': 'Amon', 'source_id': 'CNRM-CM6-1'}
11
CMIP6.DAMIP.CNRM-CERFACS.CNRM-CM6-1.hist-aer.r10i1p1f2.Amon.tas.gr.v20190308|aims3.llnl.gov
{'aims3.llnl.gov', 'esgf-data04.diasjp.net'}
trying to find valid files for node: aims3.llnl.gov
10
trying to find valid files for node: esgf-data04.diasjp.net
1
this data_node has

In [7]:
# Clean the max_results_list to remove duplicate source_id entries
# Keep the entry with the highest number of results (num_results)
# Initialize an empty list to store the unique source_id entries
unique_source_id_list = []

# Loop through the max_results_list and append the unique source_id entries
for result in max_results_list:
    if result['source_id'] not in unique_source_id_list:
        unique_source_id_list.append(result['source_id'])

# Print the list
print(unique_source_id_list)

# Initialize an empty list to store the unique max_results_list entries
unique_max_results_list = []

# Loop through the unique_source_id_list and only
# Append the max_results_list entries which match the source_id and have the highest num_results
for source_id in unique_source_id_list:
    print("source_id: {}".format(source_id))
    # Initialize an empty list to store the num_results
    num_results_list = []
    # Loop through the max_results_list and append the num_results to the list
    for result in max_results_list:
        if result['source_id'] == source_id:
            num_results_list.append(result['num_results'])
    # Get the max num_results
    max_num_results = max(num_results_list)
    # Loop through the max_results_list and append the entries which match the source_id and max_num_results
    for result in max_results_list:
        if result['source_id'] == source_id and result['num_results'] == max_num_results:
            unique_max_results_list.append(result)

['IPSL-CM6A-LR', 'CNRM-CM6-1', 'BCC-CSM2-MR', 'MRI-ESM2-0', 'HadGEM3-GC31-LL', 'GISS-E2-1-G', 'GFDL-ESM4', 'ACCESS-ESM1-5', 'NorESM2-LM', 'ACCESS-CM2', 'MIROC6', 'CanESM5', 'E3SM-2-0', 'FGOALS-g3', 'CESM2']
source_id: IPSL-CM6A-LR
source_id: CNRM-CM6-1
source_id: BCC-CSM2-MR
source_id: MRI-ESM2-0
source_id: HadGEM3-GC31-LL
source_id: GISS-E2-1-G
source_id: GFDL-ESM4
source_id: ACCESS-ESM1-5
source_id: NorESM2-LM
source_id: ACCESS-CM2
source_id: MIROC6
source_id: CanESM5
source_id: E3SM-2-0
source_id: FGOALS-g3
source_id: CESM2


In [8]:
# Convert the unique_max_results_list to a dataframe
unique_max_results_df = pd.DataFrame.from_dict(unique_max_results_list)

# Print the dataframe
unique_max_results_df

# save the dataframe
# save in current directory + save_data + filename
save_dir = os.path.join(os.getcwd(), 'save_data')
save_filename = 'unique_max_results_df_{}_{}_{}_{}_{}.csv'.format(experiment_id, variable_id, project, table_id, activity_id)

# Form the save path
save_path = os.path.join(save_dir, save_filename)

# Check if the save directory exists
if not os.path.exists(save_path):
    # Make the directory
    os.makedirs(save_dir)

# Save the dataframe
unique_max_results_df.to_csv(save_path)

In [7]:
save_dir = os.path.join(os.getcwd(), 'save_data')
save_filename = 'unique_max_results_df_{}_{}_{}_{}_{}.csv'.format(experiment_id, variable_id, project, table_id, activity_id)

# Form the save path
save_path = os.path.join(save_dir, save_filename)

In [9]:
# Open the save_path as a dataframe
unique_max_results_df = pd.read_csv(save_path)

results_list = []

# Loop over the dataframe to create a list of result sets
for i in range(len(unique_max_results_df)):
    
    # Get the source_id and data_node
    source_id = unique_max_results_df.loc[i, 'source_id']
    data_node = unique_max_results_df.loc[i, 'data_node']

    # Print the source_id and data_node
    print("source_id: {}, data_node: {}".format(source_id, data_node))

    # Print the experiment_id, variable_id, activity_id, and data_node
    print("experiment_id: {}, variable_id: {}, activity_id: {}, data_node: {}".format(experiment_id, variable_id, activity_id, data_node))

    results = query_data_esgf(conn,
                            source_id = source_id,
                            experiment_id = experiment_id,
                            variable_id = variable_id,
                            activity_id = activity_id,
                            data_node = data_node,
    )

    print(len(results))

    # Append the results to the results_list
    results_list.append(results)

# Print the results_list
print(results_list)

source_id: IPSL-CM6A-LR, data_node: aims3.llnl.gov
experiment_id: hist-aer, variable_id: tas, activity_id: DAMIP, data_node: aims3.llnl.gov
10
source_id: CNRM-CM6-1, data_node: aims3.llnl.gov
experiment_id: hist-aer, variable_id: tas, activity_id: DAMIP, data_node: aims3.llnl.gov
10
source_id: BCC-CSM2-MR, data_node: aims3.llnl.gov
experiment_id: hist-aer, variable_id: tas, activity_id: DAMIP, data_node: aims3.llnl.gov
3
source_id: MRI-ESM2-0, data_node: esgf-data03.diasjp.net
experiment_id: hist-aer, variable_id: tas, activity_id: DAMIP, data_node: esgf-data03.diasjp.net
5
source_id: HadGEM3-GC31-LL, data_node: esgf-data1.llnl.gov
experiment_id: hist-aer, variable_id: tas, activity_id: DAMIP, data_node: esgf-data1.llnl.gov
51
source_id: GISS-E2-1-G, data_node: dpesgf03.nccs.nasa.gov
experiment_id: hist-aer, variable_id: tas, activity_id: DAMIP, data_node: dpesgf03.nccs.nasa.gov
15
source_id: GFDL-ESM4, data_node: esgf-data1.llnl.gov
experiment_id: hist-aer, variable_id: tas, activity_

In [11]:
# Set up a list for the file context
file_context_list = []

# Loop through the results_list and print ther number of results
for results in results_list:
    print(len(results))

    # Extract the file context from the results
    file_context = extract_file_context_multithread(results)

    # Append the file_context to the file_context_list
    file_context_list.append(file_context)

# Print the file_context_list
print(file_context_list)

10
Extracting file context for 10 datasets...
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697aeb1f0>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697cd55d0>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697cd6350>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697cd4c70>
Error: <pyesgf.search.results.DatasetResult object at 0x7f78a4ef5300>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697cd5690>
10
Extracting file context for 10 datasets...
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697e77790>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697ae9750>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697cd45e0>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697cd50c0>
Error: <pyesgf.search.results.DatasetResult object at 0x7f7697cd7370>
3
Extracting file context for 3 datasets...
5
Extracting file context for 5 datasets...
Error: <pyesgf.search.results.DatasetResult object

In [16]:
# Check whether the files exist on JASMIN
import glob

# We want to verify whether these files exist on JASMIN
damip_dir = "/badc/cmip6/data/CMIP6/DAMIP/"

# Test the function
files_df = check_file_exists_jasmin(file_context_df, damip_dir)

files_df

IPSL
File exists for tas_Amon_IPSL-CM6A-LR_hist-aer_r10i1p1f1_gr_185001-202012.nc
IPSL
File exists for tas_Amon_IPSL-CM6A-LR_hist-aer_r7i1p1f1_gr_185001-202012.nc
IPSL
File exists for tas_Amon_IPSL-CM6A-LR_hist-aer_r3i1p1f1_gr_185001-202012.nc
IPSL
File exists for tas_Amon_IPSL-CM6A-LR_hist-aer_r4i1p1f1_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-aer_r2i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-aer_r3i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-aer_r5i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-aer_r4i1p1f2_gr_185001-202012.nc
CNRM-CERFACS
File exists for tas_Amon_CNRM-CM6-1_hist-aer_r7i1p1f2_gr_185001-202012.nc
BCC
File exists for tas_Amon_BCC-CSM2-MR_hist-aer_r3i1p1f1_gn_185001-202012.nc
BCC
File exists for tas_Amon_BCC-CSM2-MR_hist-aer_r2i1p1f1_gn_185001-202012.nc
BCC
File exists for tas_Amon_BCC-CSM2-MR_hist-aer_r1i1p1f1_gn_185001-202012.nc
MRI

Unnamed: 0,filename,url,file_exists
0,tas_Amon_IPSL-CM6A-LR_hist-aer_r10i1p1f1_gr_18...,http://aims3.llnl.gov/thredds/fileServer/css03...,True
1,tas_Amon_IPSL-CM6A-LR_hist-aer_r7i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...,True
2,tas_Amon_IPSL-CM6A-LR_hist-aer_r3i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...,True
3,tas_Amon_IPSL-CM6A-LR_hist-aer_r4i1p1f1_gr_185...,http://aims3.llnl.gov/thredds/fileServer/css03...,True
4,tas_Amon_CNRM-CM6-1_hist-aer_r2i1p1f2_gr_18500...,http://aims3.llnl.gov/thredds/fileServer/css03...,True
...,...,...,...
212,tas_Amon_CESM2_hist-aer_r3i1p1f1_gn_200001-201...,http://esgf-data.ucar.edu/thredds/fileServer/e...,True
213,tas_Amon_CESM2_hist-aer_r1i1p1f1_gn_185001-189...,http://esgf-data.ucar.edu/thredds/fileServer/e...,True
214,tas_Amon_CESM2_hist-aer_r1i1p1f1_gn_190001-194...,http://esgf-data.ucar.edu/thredds/fileServer/e...,True
215,tas_Amon_CESM2_hist-aer_r1i1p1f1_gn_195001-199...,http://esgf-data.ucar.edu/thredds/fileServer/e...,True


In [23]:
# We only want to download the files which don't already exist on JASMIN
# Set up the download directory
download_dir = "/gws/nopw/j04/scenario/users/benhutch/"

# Loop through files_df and download the files
# First constrain the dataframe to only the files which don't already exist on JASMIN
files_df = files_df[files_df['file_exists'] == False]

# Reset the index
files_df.reset_index(drop=True, inplace=True)

# Loop through the files_df and download the files
for i in tqdm(range(len(files_df))):
    # Get the file_url
    file_url = files_df.loc[i, 'url']

    # Get the filename
    filename = files_df.loc[i, 'filename']

    # Split the filename and extract the variable name
    variable = filename.split('_')[0]

    # Split the filename to get the experiment name
    experiment = filename.split('_')[3]

    # Set up the model
    model = filename.split('_')[2]

    # Set up the download directory
    download_dir_loop = os.path.join(download_dir, experiment, variable, model)

    # If the download directory doesn't exist, make it
    if not os.path.exists(download_dir_loop):
        os.makedirs(download_dir_loop)

    # Set up the download path
    download_path = os.path.join(download_dir_loop, filename)

    # Set up the request
    r = requests.get(file_url, stream=True)

    # Set up the total size
    total_size = int(r.headers.get('content-length', 0))
    
    # Set up the block size
    block_size = 1024

    # Download the file
    with open(download_path, 'wb') as f:
        for data in tqdm(r.iter_content(block_size), 
                        total = total_size//block_size, 
                        unit = 'KiB', 
                        unit_scale = True):
            f.write(data)

        # If the total size is no 0
        if total_size != 0:
            print("File is not empty")
            print("Download complete - file saved to {}".format(download_path))


228kKiB [00:13, 17.2kKiB/s]                         
  2%|▏         | 1/59 [00:13<13:21, 13.82s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MRI-ESM2-0/tas_Amon_MRI-ESM2-0_hist-aer_r4i1p1f1_gn_185001-202012.nc


31.0kKiB [00:01, 24.9kKiB/s]                          
  3%|▎         | 2/59 [00:15<06:26,  6.78s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p3f1_gn_185001-190012.nc


30.4kKiB [00:01, 24.3kKiB/s]                          
  5%|▌         | 3/59 [00:17<04:15,  4.55s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p3f1_gn_190101-195012.nc


30.4kKiB [00:03, 8.80kKiB/s]                          
  7%|▋         | 4/59 [00:21<03:59,  4.36s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p3f1_gn_195101-200012.nc


8.53kKiB [00:00, 9.57kKiB/s]                          
  8%|▊         | 5/59 [00:23<03:00,  3.35s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p3f1_gn_200101-201412.nc


31.0kKiB [00:01, 24.7kKiB/s]                          
 10%|█         | 6/59 [00:25<02:30,  2.83s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p3f1_gn_185001-190012.nc


30.4kKiB [00:01, 25.9kKiB/s]                          
 12%|█▏        | 7/59 [00:26<02:10,  2.51s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p3f1_gn_190101-195012.nc


30.4kKiB [00:01, 24.1kKiB/s]                          
 14%|█▎        | 8/59 [00:28<01:58,  2.32s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p3f1_gn_195101-200012.nc


8.53kKiB [00:00, 9.28kKiB/s]                          
 15%|█▌        | 9/59 [00:30<01:44,  2.08s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p3f1_gn_200101-201412.nc


31.0kKiB [00:01, 25.6kKiB/s]                          
 17%|█▋        | 10/59 [00:32<01:39,  2.03s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p3f1_gn_185001-190012.nc


30.4kKiB [00:07, 4.03kKiB/s]                          
 19%|█▊        | 11/59 [00:40<03:08,  3.93s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p3f1_gn_190101-195012.nc


30.4kKiB [00:04, 7.23kKiB/s]                          
 20%|██        | 12/59 [00:45<03:19,  4.24s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p3f1_gn_195101-200012.nc


8.53kKiB [00:00, 9.12kKiB/s]                          
 22%|██▏       | 13/59 [00:46<02:36,  3.40s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p3f1_gn_200101-201412.nc


30.4kKiB [00:01, 25.6kKiB/s]                          
 24%|██▎       | 14/59 [00:48<02:10,  2.91s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r1i1p1f2_gn_195101-200012.nc


31.0kKiB [00:02, 14.6kKiB/s]                          
 25%|██▌       | 15/59 [00:51<02:06,  2.88s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r2i1p1f2_gn_185001-190012.nc


30.4kKiB [00:01, 17.7kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r2i1p1f2_gn_190101-195012.nc


30.4kKiB [00:01, 23.4kKiB/s]                          
 29%|██▉       | 17/59 [00:55<01:46,  2.53s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r2i1p1f2_gn_195101-200012.nc


8.53kKiB [00:00, 8.84kKiB/s]                          
 31%|███       | 18/59 [00:57<01:31,  2.24s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r2i1p1f2_gn_200101-201412.nc


31.0kKiB [00:01, 23.8kKiB/s]                          
 32%|███▏      | 19/59 [00:59<01:25,  2.14s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p1f2_gn_185001-190012.nc


30.4kKiB [00:01, 24.0kKiB/s]                          
 34%|███▍      | 20/59 [01:01<01:20,  2.07s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p1f2_gn_190101-195012.nc


30.4kKiB [00:01, 23.9kKiB/s]                          
 36%|███▌      | 21/59 [01:03<01:16,  2.02s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p1f2_gn_195101-200012.nc


8.53kKiB [00:00, 9.36kKiB/s]                          
 37%|███▋      | 22/59 [01:04<01:08,  1.85s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r5i1p1f2_gn_200101-201412.nc


31.0kKiB [00:01, 24.6kKiB/s]                          
 39%|███▉      | 23/59 [01:06<01:11,  1.97s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p1f2_gn_185001-190012.nc


30.4kKiB [00:01, 23.7kKiB/s]                          
 41%|████      | 24/59 [01:08<01:08,  1.96s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p1f2_gn_190101-195012.nc


30.4kKiB [00:01, 23.9kKiB/s]                          
 42%|████▏     | 25/59 [01:10<01:05,  1.94s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p1f2_gn_195101-200012.nc


8.53kKiB [00:01, 4.76kKiB/s]                          
 44%|████▍     | 26/59 [01:13<01:07,  2.06s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r4i1p1f2_gn_200101-201412.nc


31.0kKiB [00:01, 18.1kKiB/s]                          
 46%|████▌     | 27/59 [01:15<01:08,  2.15s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p1f2_gn_185001-190012.nc


30.4kKiB [00:02, 10.8kKiB/s]                          
 47%|████▋     | 28/59 [01:18<01:18,  2.54s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p1f2_gn_190101-195012.nc


30.4kKiB [00:01, 23.7kKiB/s]                          
 49%|████▉     | 29/59 [01:20<01:10,  2.34s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p1f2_gn_195101-200012.nc


8.53kKiB [00:00, 9.20kKiB/s]                          
 51%|█████     | 30/59 [01:22<01:00,  2.08s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/GISS-E2-1-G/tas_Amon_GISS-E2-1-G_hist-aer_r3i1p1f2_gn_200101-201412.nc


150kKiB [00:30, 4.88kKiB/s]                         
 53%|█████▎    | 31/59 [01:53<05:03, 10.82s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/ACCESS-CM2/tas_Amon_ACCESS-CM2_hist-aer_r3i1p1f1_gn_185001-202012.nc


150kKiB [00:42, 3.54kKiB/s]                         


File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/ACCESS-CM2/tas_Amon_ACCESS-CM2_hist-aer_r2i1p1f1_gn_185001-202012.nc


84.9kKiB [00:07, 12.0kKiB/s]                          
 56%|█████▌    | 33/59 [02:45<07:16, 16.79s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r4i1p1f1_gn_185001-194912.nc


60.3kKiB [00:04, 14.6kKiB/s]                          
 58%|█████▊    | 34/59 [02:49<05:28, 13.16s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r4i1p1f1_gn_195001-202012.nc


84.9kKiB [00:05, 15.1kKiB/s]                          
 59%|█████▉    | 35/59 [02:55<04:26, 11.09s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r5i1p1f1_gn_185001-194912.nc


60.3kKiB [00:04, 14.7kKiB/s]                          
 61%|██████    | 36/59 [03:00<03:31,  9.18s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r5i1p1f1_gn_195001-202012.nc


84.9kKiB [00:05, 15.2kKiB/s]                          
 63%|██████▎   | 37/59 [03:06<03:02,  8.31s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r7i1p1f1_gn_185001-194912.nc


60.3kKiB [00:04, 12.1kKiB/s]                          
 64%|██████▍   | 38/59 [03:12<02:37,  7.50s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r7i1p1f1_gn_195001-202012.nc


84.9kKiB [00:05, 15.3kKiB/s]                          
 66%|██████▌   | 39/59 [03:18<02:22,  7.14s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r9i1p1f1_gn_185001-194912.nc


60.3kKiB [00:04, 14.8kKiB/s]                          
 68%|██████▊   | 40/59 [03:23<02:02,  6.42s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/MIROC6/tas_Amon_MIROC6_hist-aer_r9i1p1f1_gn_195001-202012.nc


53.1kKiB [00:05, 9.14kKiB/s]                          
 69%|██████▉   | 41/59 [03:31<02:01,  6.74s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r11i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 25.7kKiB/s]                          
 71%|███████   | 42/59 [03:33<01:34,  5.57s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r12i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 24.8kKiB/s]                          
 73%|███████▎  | 43/59 [03:36<01:15,  4.75s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r13i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 25.9kKiB/s]                          
 75%|███████▍  | 44/59 [03:39<01:02,  4.19s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r14i1p2f1_gn_185001-202012.nc


53.1kKiB [00:04, 11.9kKiB/s]                          
 76%|███████▋  | 45/59 [03:44<01:03,  4.51s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r15i1p1f1_gn_185001-202012.nc


53.1kKiB [00:04, 10.9kKiB/s]                          
 78%|███████▊  | 46/59 [03:50<01:03,  4.87s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r15i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 21.9kKiB/s]                          
 80%|███████▉  | 47/59 [03:53<00:52,  4.39s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r10i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 26.5kKiB/s]                          
 81%|████████▏ | 48/59 [03:56<00:42,  3.91s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r7i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 26.2kKiB/s]                          
 83%|████████▎ | 49/59 [03:59<00:35,  3.54s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r8i1p2f1_gn_185001-202012.nc


53.1kKiB [00:02, 25.7kKiB/s]                          
 85%|████████▍ | 50/59 [04:02<00:30,  3.34s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/CanESM5/tas_Amon_CanESM5_hist-aer_r9i1p2f1_gn_185001-202012.nc


108kKiB [00:11, 9.85kKiB/s]                         
 86%|████████▋ | 51/59 [04:13<00:46,  5.86s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc


108kKiB [00:20, 5.39kKiB/s]                         
 88%|████████▊ | 52/59 [04:34<01:12, 10.29s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc


108kKiB [00:14, 7.39kKiB/s]                         
 90%|████████▉ | 53/59 [04:50<01:11, 11.86s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc


32.6kKiB [00:03, 9.52kKiB/s]                          
 92%|█████████▏| 54/59 [04:54<00:47,  9.53s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-201412.nc


108kKiB [00:20, 5.33kKiB/s]                         
 93%|█████████▎| 55/59 [05:15<00:51, 12.93s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r4i1p1f1_gr_185001-189912.nc


108kKiB [00:13, 8.18kKiB/s]                         
 95%|█████████▍| 56/59 [05:28<00:39, 13.20s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r4i1p1f1_gr_190001-194912.nc


108kKiB [00:07, 14.0kKiB/s]                         
 97%|█████████▋| 57/59 [05:37<00:23, 11.72s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r4i1p1f1_gr_195001-199912.nc


32.6kKiB [00:03, 10.8kKiB/s]                          
 98%|█████████▊| 58/59 [05:40<00:09,  9.30s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/E3SM-2-0/tas_Amon_E3SM-2-0_hist-aer_r4i1p1f1_gr_200001-201412.nc


7.45kKiB [00:04, 1.68kKiB/s]                          
100%|██████████| 59/59 [05:47<00:00,  5.89s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/scenario/users/benhutch/hist-aer/tas/FGOALS-g3/tas_Amon_FGOALS-g3_hist-aer_r2i1p1f1_gn_201001-202012.nc





In [14]:
# Extract all of the file_context into dictionaries
# and then append them to a dataframe
# Initialize an empty list to store the dictionaries
# Create an empty dataframe
file_context_df = pd.DataFrame()


# For each file_context
for file_context in file_context_list:
    # Append the file_context to the dataframe
    file_context_df = file_context_df.append(file_context, ignore_index=True)

  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_conte

In [None]:
# Python
# Form a list of the unique 'source_id' values from the results
source_id_list = list(set([result.json['source_id'] for result in results]))

# Print the list
print(source_id_list)

In [5]:
# test the function for querying the database
results = query_data_esgf(conn,
                        source_id='E3SM-2-0',
                        experiment_id='hist-aer',
                        variable_id='tas',
                        table_id='Amon',
                        data_node='esgf-data2.llnl.gov',)

# print the len of the results
print(len(results))

# print the type of the results
print(type(results))

# print the results
print(results)

5
<class 'pyesgf.search.results.ResultSet'>
<pyesgf.search.results.ResultSet object at 0x7f87bf91f130>


In [6]:
# Print the details of the first result
print(results[0].json['id'])

CMIP6.DAMIP.E3SM-Project.E3SM-2-0.hist-aer.r2i1p1f1.Amon.tas.gr.v20220906|esgf-data2.llnl.gov


In [7]:
# Extract the file context
# files_list = extract_file_context(results)

# # # Turn the list into a dataframe
# # files_df = pd.DataFrame.from_dict(files_list)

# # files_df

In [8]:
files_list_mt = extract_file_context_multithread(results)

files_list_mt

Extracting file context for 5 datasets...


[{'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-201412.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/

In [9]:
files_list = files_list_mt

In [11]:
# Print the type of the files list
print(type(files_list))

# Extract this into a dataframe
files_df = pd.DataFrame.from_dict(files_list)
files_df

# Assert that all filenames contrain the string "185001" and "202012"
# assert all(files_df['filename'].str.contains('185001')), "Not all filenames contain the string 185001"
# assert all(files_df['filename'].str.contains('202012')), "Not all filenames contain the string 202012"

files_df

<class 'list'>


Unnamed: 0,filename,url
0,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
1,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
2,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
3,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
4,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
5,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
6,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
7,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
8,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
9,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...


In [12]:
import glob

# We want to verify whether these files exist on JASMIN
damip_dir = "/badc/cmip6/data/CMIP6/DAMIP/"

# Test the function
files_df = check_file_exists_jasmin(files_df, damip_dir)

files_df

E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-201412.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-199912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-201412.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1

Unnamed: 0,filename,url,file_exists
0,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
1,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
2,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
3,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
4,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
5,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
6,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
7,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
8,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
9,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False


In [10]:
# Set up the directory to download to
download_dir = "/gws/nopw/j04/scenario/users/benhutch/DAMIP"

# Set up the variable
variable = 'tas'

# Set up the experiment id
experiment_id = 'hist-aer'

# Set up the model
model = 'CanESM5'

# Set up the directory
download_path = os.path.join(download_dir, experiment_id, 
                             variable, model)

# Print the download path
print(download_path)

# Use the download function to download a single file
download_file(files_df['url'][0], 
              files_df['filename'][0], download_path)

/gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5
Downloading tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r1i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc


53.1kKiB [00:46, 1.13kKiB/s]                          


Downloaded size does not match expected size!
 FYI, the status code was  200


In [11]:
# Download all the files
for i in tqdm(range(len(files_df))):
    download_file(files_df['url'][i], 
                  files_df['filename'][i], download_path)

  0%|          | 0/5 [00:00<?, ?it/s]

Downloading tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r1i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc


53.1kKiB [00:05, 10.5kKiB/s]                          
 20%|██        | 1/5 [00:05<00:23,  5.86s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r4i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 22.6kKiB/s]                          
 40%|████      | 2/5 [00:08<00:12,  4.25s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r2i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc


53.1kKiB [00:19, 2.73kKiB/s]                          


Downloaded size does not match expected size!
 FYI, the status code was  200


 60%|██████    | 3/5 [00:29<00:23, 11.58s/it]

Downloading tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r5i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 23.0kKiB/s]                          
 80%|████████  | 4/5 [00:32<00:08,  8.18s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r3i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc


53.1kKiB [00:13, 3.82kKiB/s]                          
100%|██████████| 5/5 [00:46<00:00,  9.39s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200



