In [19]:
# Testing the downloading data method from:
# https://claut.gitlab.io/man_ccia/lab2.html
# Import the required modules
from pyesgf.search import SearchConnection
import os
import sys
import importlib
import pandas as pd
import requests
from tqdm import tqdm

# Set the os environment to on
os.environ['ESGF_PYCLIENT_NO_FACETS_STAR_WARNING'] = "on"

In [20]:
# Import the functions
from testing_download_functions import query_data_esgf, extract_file_context, \
                                        download_file, extract_file_context_multithread, \
                                        check_file_exists_jasmin, query_models_esgf

In [21]:
# Import the functions again
# import the functions
_ = importlib.reload(sys.modules['testing_download_functions'])

# Import the functions
from testing_download_functions import query_data_esgf, extract_file_context, \
                                        download_file, extract_file_context_multithread, \
                                        check_file_exists_jasmin, query_models_esgf, find_valid_nodes

In [22]:
# set the search connection
# to the LLNL search node
conn = SearchConnection('https://esgf-node.llnl.gov/esg-search', distrib=True)

In [23]:
# Find the models which have data on the esgf node for the following constraints
# The constraints are:
experiment_id = 'dcppA-hindcast'
latest = True
variable_id = 'sfcWind'
project = 'CMIP6'
table_id = 'Amon'
activity_id = 'DCPP'
sub_experiment_id = 's1962,s1963'

In [24]:
# Test the function which does the same as below
models_list = query_models_esgf(experiment_id=experiment_id,
                                variable_id=variable_id,
                                table_id=table_id,
                                activity_id=activity_id,
                                connection=conn,
                                latest=latest,
                                project=project,
                                sub_experiment_id=sub_experiment_id)

# Print the models list
print(models_list)

KeyboardInterrupt: 

In [7]:
# # Set up the params for the query
# params = {
#     "latest": latest,
#     "project": project,
#     "experiment_id": experiment_id,
#     "variable_id": variable_id,
#     "activity_id": activity_id,
#     "table_id": table_id
# }

# # Query the database
# query = conn.new_context(**params)

# # Get the results
# results = query.search()

# print(len(results))

# # Python
# # Form a list of the unique 'source_id' values from the results
# # Python
# # Form a set of the unique 'source_id' values from the results
# source_id_set = set(id for result in results for id in result.json['source_id'])

# # Print the set
# print(source_id_set)

In [25]:
# print(type(models_list))

# # # Convert the set to a list
# source_id_list = list(models_list)

# # Constrain to the model HadGEM3-GC31-LL and the model following it
# # find the index of the model
# index = source_id_list.index('HadGEM3-GC31-MM')

# source_id_list = ["BCC-CSM2-MR", "HadGEM3-GC31-MM"]
source_id_list = ["CESM1-1-CAM5-CMIP5"]

source_id_set = source_id_list

In [26]:
# Set the params for the query
params = {
    "latest": latest,
    "project": project,
    "experiment_id": experiment_id,
    "variable_id": variable_id,
    "activity_id": activity_id,
    "table_id": table_id,
    "sub_experiment_id": sub_experiment_id
}

In [27]:
# # Constrain the source_id_set to the first 1 model
# source_id_set = list(source_id_set)[0:1]

# # Print the set
# print(source_id_set)

# Initialize an empty dictionary to store the results
max_results = {'source_id': None, 'data_node': None, 'num_results': 0}

# Create a list for the max_results dictionaries
max_results_list = []

# Set up the max results per source dictionary
max_results_per_source = {}

# Loop through the source_id_set and query which nodes have data for each model
for source_id in source_id_set:
    print("trying to find valid nodes for model: {}".format(source_id))
    # Set the source_id constraint
    params['source_id'] = source_id
    print(params)
    # Query the database
    model_query = conn.new_context(**params)
    # Get the results
    model_results = model_query.search()
    # Print the number of results
    print(len(model_results))

    # if the len of the model results is not 0
    if len(model_results) != 0:
        # Print the first result
        print(model_results[0].json['id'])

    # Identify the unique nodes (data_node) which have data for the model
    data_node_set = set(result.json['data_node'] for result in model_results)

    # Print the set
    print(data_node_set)

    # Loop through the data_node_set and query how many files are available for each 
    # node
    for data_node in data_node_set:
        print("trying to find valid files for node: {}".format(data_node))
        
        # Set up the params for the query
        params_node = params.copy()
        
        # Set the data_node constraint
        params_node['data_node'] = data_node
        # Query the database
        node_query = conn.new_context(**params_node)
        # Get the results
        node_results = node_query.search()
        # Print the number of results
        print(len(node_results))

        # If this source_id is not in max_results_per_source or this data_node has more results, update the dictionary
        if source_id not in max_results_per_source or len(node_results) > max_results_per_source[source_id]:
            max_results = {'source_id': source_id, 'data_node': data_node, 'num_results': len(node_results)}
            max_results_per_source[source_id] = len(node_results)

            # Append the max_results dictionary to the list
            max_results_list.append(max_results)
        else:
            print("this data_node has less results than the previous one")
            continue

# Print the dictionary
print(max_results_list)





trying to find valid nodes for model: CESM1-1-CAM5-CMIP5
{'latest': True, 'project': 'CMIP6', 'experiment_id': 'dcppA-hindcast', 'variable_id': 'sfcWind', 'activity_id': 'DCPP', 'table_id': 'Amon', 'sub_experiment_id': 's1962,s1963', 'source_id': 'CESM1-1-CAM5-CMIP5'}
160
CMIP6.DCPP.NCAR.CESM1-1-CAM5-CMIP5.dcppA-hindcast.s1963-r8i1p1f1.Amon.sfcWind.gn.v20191007|esgf-data.ucar.edu
{'esgf-data.ucar.edu', 'esgf-data1.llnl.gov'}
trying to find valid files for node: esgf-data.ucar.edu
80
trying to find valid files for node: esgf-data1.llnl.gov
80
this data_node has less results than the previous one
[{'source_id': 'CESM1-1-CAM5-CMIP5', 'data_node': 'esgf-data.ucar.edu', 'num_results': 80}]


In [11]:
# # Test the function for finding the valid nodes
# max_results_list = find_valid_nodes(params=params, 
#                                     models_list=models_list,
#                                     conn=conn)

In [28]:
# Clean the max_results_list to remove duplicate source_id entries
# Keep the entry with the highest number of results (num_results)
# Initialize an empty list to store the unique source_id entries
unique_source_id_list = []

# Loop through the max_results_list and append the unique source_id entries
for result in max_results_list:
    if result['source_id'] not in unique_source_id_list:
        unique_source_id_list.append(result['source_id'])

# Print the list
print(unique_source_id_list)

# Initialize an empty list to store the unique max_results_list entries
unique_max_results_list = []

# Loop through the unique_source_id_list and only
# Append the max_results_list entries which match the source_id and have the highest num_results
for source_id in unique_source_id_list:
    print("source_id: {}".format(source_id))
    # Initialize an empty list to store the num_results
    num_results_list = []
    # Loop through the max_results_list and append the num_results to the list
    for result in max_results_list:
        if result['source_id'] == source_id:
            num_results_list.append(result['num_results'])
    # Get the max num_results
    max_num_results = max(num_results_list)
    # Loop through the max_results_list and append the entries which match the source_id and max_num_results
    for result in max_results_list:
        if result['source_id'] == source_id and result['num_results'] == max_num_results:
            unique_max_results_list.append(result)

['CESM1-1-CAM5-CMIP5']
source_id: CESM1-1-CAM5-CMIP5


In [13]:
# # Convert the unique_max_results_list to a dataframe
# unique_max_results_df = pd.DataFrame.from_dict(unique_max_results_list)

# # Print the dataframe
# unique_max_results_df

# # save the dataframe
# # save in current directory + save_data + filename
# save_dir = os.path.join(os.getcwd(), 'save_data')
# save_filename = 'unique_max_results_df_{}_{}_{}_{}_{}.csv'.format(experiment_id, variable_id, project, table_id, activity_id)

# # Form the save path
# save_path = os.path.join(save_dir, save_filename)

# # Check if the save directory exists
# if not os.path.exists(save_dir):
#     # Make the directory
#     os.makedirs(save_dir)

# # Save the dataframe
# unique_max_results_df.to_csv(save_path)

In [14]:
# save_dir = os.path.join(os.getcwd(), 'save_data')
# save_filename = 'unique_max_results_df_{}_{}_{}_{}_{}.csv'.format(experiment_id, variable_id, project, table_id, activity_id)

# # Form the save path
# save_path = os.path.join(save_dir, save_filename)

In [29]:
print(type(unique_max_results_list))

print(unique_max_results_list)

# Convert the unique_max_results_list to a dataframe
unique_max_results_df = pd.DataFrame.from_dict(unique_max_results_list)

# Print the dataframe
unique_max_results_df

<class 'list'>
[{'source_id': 'CESM1-1-CAM5-CMIP5', 'data_node': 'esgf-data.ucar.edu', 'num_results': 80}]


Unnamed: 0,source_id,data_node,num_results
0,CESM1-1-CAM5-CMIP5,esgf-data.ucar.edu,80


In [30]:
# Open the save_path as a dataframe
# unique_max_results_df = pd.read_csv(save_path)

# # Convert unique_max_results to a dataframe
# unique_max_results_df = pd.DataFrame.from_dict(unique_max_results_list)

results_list = []

# Loop over the dataframe to create a list of result sets
for i in range(len(unique_max_results_df)):
    
    # Get the source_id and data_node
    source_id = unique_max_results_df.loc[i, 'source_id']
    data_node = unique_max_results_df.loc[i, 'data_node']

    # Print the source_id and data_node
    print("source_id: {}, data_node: {}".format(source_id, data_node))

    # Print the experiment_id, variable_id, activity_id, and data_node
    print("experiment_id: {}, variable_id: {}, activity_id: {}, data_node: {}".format(experiment_id, variable_id, activity_id, data_node))

    results = query_data_esgf(conn,
                            source_id = source_id,
                            experiment_id = experiment_id,
                            variable_id = variable_id,
                            table_id = table_id,
                            project=project,
                            activity_id = activity_id,
                            data_node = data_node,
                            sub_experiment_id=sub_experiment_id
    )

    print(len(results))

    # Append the results to the results_list
    results_list.append(results)

# Print the results_list
print(results_list)

source_id: CESM1-1-CAM5-CMIP5, data_node: esgf-data.ucar.edu
experiment_id: dcppA-hindcast, variable_id: sfcWind, activity_id: DCPP, data_node: esgf-data.ucar.edu
80
[<pyesgf.search.results.ResultSet object at 0x7f78264ff790>]


In [31]:
# Set up a list for the file context
file_context_list = []
failed_results_list = []

# # Constrain results_list to the first 3 results
# results_list = results_list[3:5]

# Loop through the results_list and print ther number of results
for results in results_list:
    print(len(results))

    # Extract the file context from the results
    file_context, failed_results = extract_file_context(results)

    # Append the file_context to the file_context_list
    file_context_list.append(file_context)

    # Append the failed_results to the failed_results_list
    failed_results_list.append(failed_results)


# Print the file_context_list
print(file_context_list)

# Print the failed_results_list
print(failed_results_list)

80
Extracting file context for 80 datasets...


  1%|▏         | 1/80 [00:02<03:43,  2.83s/it]

Processed 1 out of 80 results.


  2%|▎         | 2/80 [00:04<03:07,  2.40s/it]

Processed 2 out of 80 results.


  4%|▍         | 3/80 [00:06<02:39,  2.08s/it]

Processed 3 out of 80 results.


  5%|▌         | 4/80 [00:07<02:11,  1.73s/it]

Processed 4 out of 80 results.


  6%|▋         | 5/80 [00:08<01:54,  1.53s/it]

Processed 5 out of 80 results.


  8%|▊         | 6/80 [00:10<01:44,  1.41s/it]

Processed 6 out of 80 results.


  9%|▉         | 7/80 [00:11<01:38,  1.35s/it]

Processed 7 out of 80 results.


 10%|█         | 8/80 [00:13<01:53,  1.58s/it]

Processed 8 out of 80 results.


 11%|█▏        | 9/80 [00:15<01:58,  1.67s/it]

Processed 9 out of 80 results.


 12%|█▎        | 10/80 [00:17<02:02,  1.75s/it]

Processed 10 out of 80 results.


 14%|█▍        | 11/80 [00:19<02:04,  1.80s/it]

Processed 11 out of 80 results.


 15%|█▌        | 12/80 [00:20<01:49,  1.61s/it]

Processed 12 out of 80 results.


 16%|█▋        | 13/80 [00:21<01:39,  1.49s/it]

Processed 13 out of 80 results.


 18%|█▊        | 14/80 [00:22<01:32,  1.40s/it]

Processed 14 out of 80 results.


 19%|█▉        | 15/80 [00:23<01:26,  1.34s/it]

Processed 15 out of 80 results.


 20%|██        | 16/80 [00:25<01:37,  1.53s/it]

Processed 16 out of 80 results.


 21%|██▏       | 17/80 [00:27<01:42,  1.62s/it]

Processed 17 out of 80 results.


 22%|██▎       | 18/80 [00:29<01:46,  1.71s/it]

Processed 18 out of 80 results.


 24%|██▍       | 19/80 [00:31<01:48,  1.77s/it]

Processed 19 out of 80 results.


 25%|██▌       | 20/80 [00:32<01:35,  1.59s/it]

Processed 20 out of 80 results.


 26%|██▋       | 21/80 [00:33<01:26,  1.47s/it]

Processed 21 out of 80 results.


 28%|██▊       | 22/80 [00:35<01:20,  1.39s/it]

Processed 22 out of 80 results.


 29%|██▉       | 23/80 [00:36<01:15,  1.32s/it]

Processed 23 out of 80 results.


 30%|███       | 24/80 [00:37<01:14,  1.33s/it]

Processed 24 out of 80 results.


 31%|███▏      | 25/80 [00:39<01:22,  1.50s/it]

Processed 25 out of 80 results.


 32%|███▎      | 26/80 [00:41<01:29,  1.66s/it]

Processed 26 out of 80 results.


 34%|███▍      | 27/80 [00:43<01:35,  1.80s/it]

Processed 27 out of 80 results.


 35%|███▌      | 28/80 [00:45<01:29,  1.72s/it]

Processed 28 out of 80 results.


 36%|███▋      | 29/80 [00:46<01:19,  1.56s/it]

Processed 29 out of 80 results.


 38%|███▊      | 30/80 [00:47<01:15,  1.51s/it]

Processed 30 out of 80 results.


 39%|███▉      | 31/80 [00:49<01:09,  1.42s/it]

Processed 31 out of 80 results.


 40%|████      | 32/80 [00:50<01:07,  1.40s/it]

Processed 32 out of 80 results.


 41%|████▏     | 33/80 [00:52<01:15,  1.60s/it]

Processed 33 out of 80 results.


 42%|████▎     | 34/80 [00:54<01:15,  1.64s/it]

Processed 34 out of 80 results.


 44%|████▍     | 35/80 [00:56<01:19,  1.76s/it]

Processed 35 out of 80 results.


 45%|████▌     | 36/80 [00:57<01:15,  1.71s/it]

Processed 36 out of 80 results.


 46%|████▋     | 37/80 [00:59<01:06,  1.55s/it]

Processed 37 out of 80 results.


 48%|████▊     | 38/80 [01:00<01:00,  1.44s/it]

Processed 38 out of 80 results.


 49%|████▉     | 39/80 [01:01<00:56,  1.37s/it]

Processed 39 out of 80 results.


 50%|█████     | 40/80 [01:02<00:54,  1.36s/it]

Processed 40 out of 80 results.


 51%|█████▏    | 41/80 [01:04<01:01,  1.57s/it]

Processed 41 out of 80 results.


 52%|█████▎    | 42/80 [01:06<01:00,  1.60s/it]

Processed 42 out of 80 results.


 54%|█████▍    | 43/80 [01:08<01:04,  1.75s/it]

Processed 43 out of 80 results.


 55%|█████▌    | 44/80 [01:10<01:00,  1.67s/it]

Processed 44 out of 80 results.


 56%|█████▋    | 45/80 [01:11<00:53,  1.52s/it]

Processed 45 out of 80 results.


 57%|█████▊    | 46/80 [01:12<00:48,  1.42s/it]

Processed 46 out of 80 results.


 59%|█████▉    | 47/80 [01:13<00:44,  1.36s/it]

Processed 47 out of 80 results.


 60%|██████    | 48/80 [01:14<00:41,  1.31s/it]

Processed 48 out of 80 results.


 61%|██████▏   | 49/80 [01:16<00:47,  1.53s/it]

Processed 49 out of 80 results.


 62%|██████▎   | 50/80 [01:18<00:49,  1.65s/it]

Processed 50 out of 80 results.


 64%|██████▍   | 51/80 [01:21<00:58,  2.00s/it]

Processed 51 out of 80 results.


 65%|██████▌   | 52/80 [01:22<00:49,  1.76s/it]

Processed 52 out of 80 results.


 66%|██████▋   | 53/80 [01:24<00:42,  1.59s/it]

Processed 53 out of 80 results.


 68%|██████▊   | 54/80 [01:25<00:38,  1.47s/it]

Processed 54 out of 80 results.


 69%|██████▉   | 55/80 [01:26<00:34,  1.38s/it]

Processed 55 out of 80 results.


 70%|███████   | 56/80 [01:27<00:31,  1.32s/it]

Processed 56 out of 80 results.


 71%|███████▏  | 57/80 [01:28<00:29,  1.28s/it]

Processed 57 out of 80 results.


 72%|███████▎  | 58/80 [01:29<00:27,  1.25s/it]

Processed 58 out of 80 results.


 74%|███████▍  | 59/80 [01:31<00:25,  1.23s/it]

Processed 59 out of 80 results.


 75%|███████▌  | 60/80 [01:32<00:24,  1.22s/it]

Processed 60 out of 80 results.


 76%|███████▋  | 61/80 [01:33<00:23,  1.26s/it]

Processed 61 out of 80 results.


 78%|███████▊  | 62/80 [01:35<00:27,  1.51s/it]

Processed 62 out of 80 results.


 79%|███████▉  | 63/80 [01:37<00:27,  1.63s/it]

Processed 63 out of 80 results.


 80%|████████  | 64/80 [01:39<00:28,  1.77s/it]

Processed 64 out of 80 results.


 81%|████████▏ | 65/80 [01:41<00:26,  1.75s/it]

Processed 65 out of 80 results.


 82%|████████▎ | 66/80 [01:42<00:22,  1.58s/it]

Processed 66 out of 80 results.


 84%|████████▍ | 67/80 [01:43<00:19,  1.46s/it]

Processed 67 out of 80 results.


 85%|████████▌ | 68/80 [01:45<00:16,  1.38s/it]

Processed 68 out of 80 results.


 86%|████████▋ | 69/80 [01:47<00:17,  1.58s/it]

Processed 69 out of 80 results.


 88%|████████▊ | 70/80 [01:48<00:16,  1.64s/it]

Processed 70 out of 80 results.


 89%|████████▉ | 71/80 [01:51<00:16,  1.80s/it]

Processed 71 out of 80 results.


 90%|█████████ | 72/80 [01:52<00:14,  1.77s/it]

Processed 72 out of 80 results.


 91%|█████████▏| 73/80 [01:53<00:11,  1.60s/it]

Processed 73 out of 80 results.


 92%|█████████▎| 74/80 [01:55<00:08,  1.48s/it]

Processed 74 out of 80 results.


 94%|█████████▍| 75/80 [01:56<00:06,  1.39s/it]

Processed 75 out of 80 results.


 95%|█████████▌| 76/80 [01:57<00:05,  1.33s/it]

Processed 76 out of 80 results.


 96%|█████████▋| 77/80 [01:59<00:04,  1.54s/it]

Processed 77 out of 80 results.


 98%|█████████▊| 78/80 [02:01<00:03,  1.64s/it]

Processed 78 out of 80 results.


 99%|█████████▉| 79/80 [02:03<00:01,  1.74s/it]

Processed 79 out of 80 results.


100%|██████████| 80/80 [02:05<00:00,  1.57s/it]

Processed 80 out of 80 results.
[[{'filename': 'sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r8i1p1f1_gn_196311-197312.nc', 'url': 'http://esgf-data.ucar.edu/thredds/fileServer/esg_dataroot/CMIP6/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/s1963-r8i1p1f1/Amon/sfcWind/gn/v20191007/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r8i1p1f1_gn_196311-197312.nc'}, {'filename': 'sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r9i1p1f1_gn_196311-197312.nc', 'url': 'http://esgf-data.ucar.edu/thredds/fileServer/esg_dataroot/CMIP6/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/s1963-r9i1p1f1/Amon/sfcWind/gn/v20191007/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r9i1p1f1_gn_196311-197312.nc'}, {'filename': 'sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r40i1p1f1_gn_196311-197312.nc', 'url': 'http://esgf-data.ucar.edu/thredds/fileServer/esg_dataroot/CMIP6/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/s1963-r40i1p1f1/Amon/sfcWind/gn/v20191007/sfcWind_Amon_CESM1-1-CAM5-CMI




In [32]:
print(len(file_context_list))
print(type(file_context_list))
print(file_context_list)

for file_context in file_context_list:
    print(type(file_context))
    print(file_context)

1
<class 'list'>
[[{'filename': 'sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r8i1p1f1_gn_196311-197312.nc', 'url': 'http://esgf-data.ucar.edu/thredds/fileServer/esg_dataroot/CMIP6/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/s1963-r8i1p1f1/Amon/sfcWind/gn/v20191007/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r8i1p1f1_gn_196311-197312.nc'}, {'filename': 'sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r9i1p1f1_gn_196311-197312.nc', 'url': 'http://esgf-data.ucar.edu/thredds/fileServer/esg_dataroot/CMIP6/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/s1963-r9i1p1f1/Amon/sfcWind/gn/v20191007/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r9i1p1f1_gn_196311-197312.nc'}, {'filename': 'sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r40i1p1f1_gn_196311-197312.nc', 'url': 'http://esgf-data.ucar.edu/thredds/fileServer/esg_dataroot/CMIP6/DCPP/NCAR/CESM1-1-CAM5-CMIP5/dcppA-hindcast/s1963-r40i1p1f1/Amon/sfcWind/gn/v20191007/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindca

In [33]:
# Create an empty DataFrame
df = pd.DataFrame()

# Loop through the file_context_list
for file_context in file_context_list:
    # Convert the dictionary to a DataFrame
    temp_df = pd.DataFrame(file_context)

    # Append the temp_df to the main df
    df = pd.concat([df, temp_df], ignore_index=True)

# Print the df
df

Unnamed: 0,filename,url
0,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
1,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
2,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
3,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
4,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
...,...,...
75,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
76,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
77,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...
78,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...


In [20]:
# # Python
# # Create an empty DataFrame
# df = pd.DataFrame()

# # Loop through the file_context_list
# for file_context in file_context_list:
#     # Convert the dictionary to a DataFrame
#     temp_df = pd.DataFrame.from_dict(file_context)

#     # Append the temp_df to the main df
#     df = pd.concat([df, temp_df], ignore_index=True)

In [34]:
# reload for modules
import importlib
import sys

# import the functions
_ = importlib.reload(sys.modules['testing_download_functions'])

from testing_download_functions import query_data_esgf, extract_file_context, \
                                        download_file, extract_file_context_multithread, \
                                        check_file_exists_jasmin

In [35]:
# Check whether the files exist on JASMIN
import glob

# Set up the file_context_df
file_context_df = df.copy()

# We want to verify whether these files exist on JASMIN
damip_dir = "/badc/cmip6/data/CMIP6/DCPP/"

# Test the function
# TODO
files_df = check_file_exists_jasmin(file_context_df, damip_dir)

files_df

NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r8i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r9i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r40i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r4i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r5i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r6i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r7i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r33i1p1f1_gn_196311-197312.nc
NCAR
badc
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcpp

Unnamed: 0,filename,url,file_exists,filepath
0,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
1,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
2,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
3,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
4,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
...,...,...,...,...
75,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
76,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
77,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,
78,sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast...,http://esgf-data.ucar.edu/thredds/fileServer/e...,False,


In [36]:
# We only want to download the files which don't already exist on JASMIN
dcpp_dir_gws = "/gws/nopw/j04/canari/users/benhutch/"


# Now we want to check whether the files exist on JASMIN
files_df = check_file_exists_jasmin(files_df, dcpp_dir_gws)

NCAR
gws
File exists for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r8i1p1f1_gn_196311-197312.nc
NCAR
gws
File exists for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r9i1p1f1_gn_196311-197312.nc
NCAR
gws
File exists for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r40i1p1f1_gn_196311-197312.nc
NCAR
gws
File exists for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r4i1p1f1_gn_196311-197312.nc
NCAR
gws
File exists for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r5i1p1f1_gn_196311-197312.nc
NCAR
gws
File exists for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r6i1p1f1_gn_196311-197312.nc
NCAR
gws
File exists for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r7i1p1f1_gn_196311-197312.nc
NCAR
gws
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r33i1p1f1_gn_196311-197312.nc
NCAR
gws
File does not exist for sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r34i1p1f1_gn_196311-197312.nc
NCAR
gws
File exis

In [37]:
# We only want to download the files which don't already exist on JASMIN
# Set up the download directory
download_dir = "/gws/nopw/j04/canari/users/benhutch/"

# Keep a copy of the files_df
files_df_copy = files_df.copy()

# Keep only the files which exist on JASMIN in the copy
files_df_copy = files_df_copy[files_df_copy['file_exists'] == True]

# Loop through files_df and download the files
# First constrain the dataframe to only the files which don't already exist on JASMIN
files_df = files_df[files_df['file_exists'] == False]

# Reset the index
files_df.reset_index(drop=True, inplace=True)

# Loop through the files_df and download the files
for i in tqdm(range(len(files_df))):
    # Get the file_url
    file_url = files_df.loc[i, 'url']

    # Get the filename
    filename = files_df.loc[i, 'filename']

    # Split the filename and extract the variable name
    variable = filename.split('_')[0]

    # Split the filename to get the experiment name
    experiment = filename.split('_')[3]

    # Set up the model
    model = filename.split('_')[2]

    # Set up the download directory
    # download_dir_loop = os.path.join(download_dir, experiment, variable, model)

    # Modify download dir to be inline with that already saved on canari
    download_dir_loop = os.path.join(download_dir, experiment, "data",
                                        variable, model)

    # If the download directory doesn't exist, make it
    if not os.path.exists(download_dir_loop):
        os.makedirs(download_dir_loop)

    # Set up the download path
    download_path = os.path.join(download_dir_loop, filename)

    # Assert that the download path doesn't already exist
    assert not os.path.exists(download_path), "The file {} already exists".format(download_path)

    # In the filepath column of the dataframe
    # replace the current file path with the download path
    files_df.loc[i, 'filepath'] = download_path

    # Replace the file_exists column with True
    files_df.loc[i, 'file_exists'] = True

    # Set up the request
    r = requests.get(file_url, stream=True)

    # Set up the total size
    total_size = int(r.headers.get('content-length', 0))
    
    # Set up the block size
    block_size = 1024

    # Download the file
    with open(download_path, 'wb') as f:
        for data in tqdm(r.iter_content(block_size), 
                        total = total_size//block_size, 
                        unit = 'KiB', 
                        unit_scale = True):
            f.write(data)

        # If the total size is no 0
        if total_size != 0:
            print("File is not empty")
            print("Download complete - file saved to {}".format(download_path))

# Assert that all rows in file_exists are True
assert all(files_df['file_exists'] == True), "Not all files have been downloaded"

# Assert that the filepath column doesn't contain any NaNs
assert not any(files_df['filepath'].isna()), "The filepath column contains NaNs"


# append the files_df to the files_df_copy
files_df_copy = pd.concat([files_df_copy, files_df], ignore_index=True)

# Assrt tha


19.3kKiB [00:01, 10.3kKiB/s]                          
  1%|▏         | 1/68 [00:02<02:47,  2.50s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r33i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.2kKiB/s]                          
  3%|▎         | 2/68 [00:05<02:46,  2.52s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r34i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 11.0kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r19i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.0kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r1i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.6kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r27i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 11.0kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r28i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.1kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r22i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.3kKiB/s]                          
 12%|█▏        | 8/68 [00:22<02:46,  2.77s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r23i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.9kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r24i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 9.68kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r20i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.2kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r21i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.2kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r17i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 9.62kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r18i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 11.0kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r25i1p1f1_gn_196311-197312.nc


19.3kKiB [00:01, 10.1kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r26i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 9.03kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r29i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 7.60kKiB/s]                          
 25%|██▌       | 17/68 [00:51<02:48,  3.31s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r2i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 8.00kKiB/s]                          
 26%|██▋       | 18/68 [00:54<02:40,  3.22s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r5i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 7.71kKiB/s]                          
 28%|██▊       | 19/68 [00:57<02:35,  3.18s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r6i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 8.12kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r7i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 7.02kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r8i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 7.34kKiB/s]                          
 32%|███▏      | 22/68 [01:08<02:35,  3.39s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r9i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 6.71kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r10i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 6.91kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r11i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 6.63kKiB/s]                          
 37%|███▋      | 25/68 [01:20<02:41,  3.76s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r12i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 7.33kKiB/s]                          
 38%|███▊      | 26/68 [01:24<02:34,  3.67s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r13i1p1f1_gn_196311-197312.nc


19.3kKiB [00:02, 7.99kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r39i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 6.72kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r3i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 6.71kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r40i1p1f1_gn_196211-197212.nc


19.3kKiB [00:02, 6.62kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r4i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.32kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r14i1p1f1_gn_196311-197312.nc


19.3kKiB [00:03, 5.49kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r15i1p1f1_gn_196311-197312.nc


19.3kKiB [00:03, 5.93kKiB/s]                          
 49%|████▊     | 33/68 [01:53<02:27,  4.23s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r16i1p1f1_gn_196311-197312.nc


19.3kKiB [00:03, 5.43kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r37i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.51kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r38i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.72kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r12i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.48kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r13i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.49kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r14i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.55kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r10i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.93kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r11i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.87kKiB/s]                          
 60%|██████    | 41/68 [02:34<02:10,  4.84s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r15i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.71kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r16i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.48kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r17i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.51kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r18i1p1f1_gn_196211-197212.nc


19.3kKiB [00:05, 3.59kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r30i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.92kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r31i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.46kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r27i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.51kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r28i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.49kKiB/s]                          
 72%|███████▏  | 49/68 [03:13<01:28,  4.64s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r32i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.52kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r33i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.87kKiB/s]                          
 75%|███████▌  | 51/68 [03:22<01:15,  4.42s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r25i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.56kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r26i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.67kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r22i1p1f1_gn_196211-197212.nc


19.3kKiB [00:04, 4.29kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r23i1p1f1_gn_196211-197212.nc


19.3kKiB [00:04, 4.67kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r24i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 4.87kKiB/s]                          
 82%|████████▏ | 56/68 [03:46<00:58,  4.86s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r29i1p1f1_gn_196211-197212.nc


19.3kKiB [00:04, 4.82kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r2i1p1f1_gn_196211-197212.nc


19.3kKiB [00:04, 4.82kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r34i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.14kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r35i1p1f1_gn_196211-197212.nc


19.3kKiB [00:04, 4.56kKiB/s]                          
 88%|████████▊ | 60/68 [04:06<00:39,  4.92s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r36i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 4.98kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r20i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 5.14kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r21i1p1f1_gn_196211-197212.nc


19.3kKiB [00:04, 4.65kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r19i1p1f1_gn_196211-197212.nc


19.3kKiB [00:04, 4.79kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1962-r1i1p1f1_gn_196211-197212.nc


19.3kKiB [00:03, 4.98kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r35i1p1f1_gn_196311-197312.nc


19.3kKiB [00:04, 4.66kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r30i1p1f1_gn_196311-197312.nc


19.3kKiB [00:04, 4.61kKiB/s]                          


File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r31i1p1f1_gn_196311-197312.nc


19.3kKiB [00:04, 4.78kKiB/s]                          
100%|██████████| 68/68 [04:50<00:00,  4.27s/it]

File is not empty
Download complete - file saved to /gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/sfcWind/CESM1-1-CAM5-CMIP5/sfcWind_Amon_CESM1-1-CAM5-CMIP5_dcppA-hindcast_s1963-r32i1p1f1_gn_196311-197312.nc





In [25]:
# Assert that all rows in file_exists are True
assert all(files_df_copy['file_exists'] == True), "Not all files have been downloaded"

# Assert that the filepath column doesn't contain any NaNs
assert not any(files_df_copy['filepath'].isna()), "The filepath column contains NaNs"

In [14]:
# Extract all of the file_context into dictionaries
# and then append them to a dataframe
# Initialize an empty list to store the dictionaries
# Create an empty dataframe
file_context_df = pd.DataFrame()


# For each file_context
for file_context in file_context_list:
    # Append the file_context to the dataframe
    file_context_df = file_context_df.append(file_context, ignore_index=True)

  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_context_df = file_context_df.append(file_context, ignore_index=True)
  file_conte

In [None]:
# Python
# Form a list of the unique 'source_id' values from the results
source_id_list = list(set([result.json['source_id'] for result in results]))

# Print the list
print(source_id_list)

In [5]:
# test the function for querying the database
results = query_data_esgf(conn,
                        source_id='E3SM-2-0',
                        experiment_id='hist-aer',
                        variable_id='tas',
                        table_id='Amon',
                        data_node='esgf-data2.llnl.gov',)

# print the len of the results
print(len(results))

# print the type of the results
print(type(results))

# print the results
print(results)

5
<class 'pyesgf.search.results.ResultSet'>
<pyesgf.search.results.ResultSet object at 0x7f87bf91f130>


In [6]:
# Print the details of the first result
print(results[0].json['id'])

CMIP6.DAMIP.E3SM-Project.E3SM-2-0.hist-aer.r2i1p1f1.Amon.tas.gr.v20220906|esgf-data2.llnl.gov


In [7]:
# Extract the file context
# files_list = extract_file_context(results)

# # # Turn the list into a dataframe
# # files_df = pd.DataFrame.from_dict(files_list)

# # files_df

In [8]:
files_list_mt = extract_file_context_multithread(results)

files_list_mt

Extracting file context for 5 datasets...


[{'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/E3SM-2-0/hist-aer/r2i1p1f1/Amon/tas/gr/v20220906/tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc'},
 {'filename': 'tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-201412.nc',
  'url': 'https://esgf-data2.llnl.gov/thredds/fileServer/user_pub_work/CMIP6/DAMIP/E3SM-Project/

In [9]:
files_list = files_list_mt

In [11]:
# Print the type of the files list
print(type(files_list))

# Extract this into a dataframe
files_df = pd.DataFrame.from_dict(files_list)
files_df

# Assert that all filenames contrain the string "185001" and "202012"
# assert all(files_df['filename'].str.contains('185001')), "Not all filenames contain the string 185001"
# assert all(files_df['filename'].str.contains('202012')), "Not all filenames contain the string 202012"

files_df

<class 'list'>


Unnamed: 0,filename,url
0,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
1,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
2,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
3,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
4,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
5,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
6,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
7,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
8,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...
9,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...


In [12]:
import glob

# We want to verify whether these files exist on JASMIN
damip_dir = "/badc/cmip6/data/CMIP6/DAMIP/"

# Test the function
files_df = check_file_exists_jasmin(files_df, damip_dir)

files_df

E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-199912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-201412.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-199912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-201412.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-189912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-194912.nc
E3SM-Project
File does not exist for tas_Amon_E3SM-2-0_hist-aer_r3i1p1

Unnamed: 0,filename,url,file_exists
0,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
1,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
2,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
3,tas_Amon_E3SM-2-0_hist-aer_r2i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
4,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
5,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
6,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_195001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
7,tas_Amon_E3SM-2-0_hist-aer_r1i1p1f1_gr_200001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
8,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_185001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False
9,tas_Amon_E3SM-2-0_hist-aer_r3i1p1f1_gr_190001-...,https://esgf-data2.llnl.gov/thredds/fileServer...,False


In [10]:
# Set up the directory to download to
download_dir = "/gws/nopw/j04/scenario/users/benhutch/DAMIP"

# Set up the variable
variable = 'tas'

# Set up the experiment id
experiment_id = 'hist-aer'

# Set up the model
model = 'CanESM5'

# Set up the directory
download_path = os.path.join(download_dir, experiment_id, 
                             variable, model)

# Print the download path
print(download_path)

# Use the download function to download a single file
download_file(files_df['url'][0], 
              files_df['filename'][0], download_path)

/gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5
Downloading tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r1i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc


53.1kKiB [00:46, 1.13kKiB/s]                          


Downloaded size does not match expected size!
 FYI, the status code was  200


In [11]:
# Download all the files
for i in tqdm(range(len(files_df))):
    download_file(files_df['url'][i], 
                  files_df['filename'][i], download_path)

  0%|          | 0/5 [00:00<?, ?it/s]

Downloading tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r1i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r1i1p1f1_gn_185001-202012.nc


53.1kKiB [00:05, 10.5kKiB/s]                          
 20%|██        | 1/5 [00:05<00:23,  5.86s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r4i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r4i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 22.6kKiB/s]                          
 40%|████      | 2/5 [00:08<00:12,  4.25s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r2i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r2i1p1f1_gn_185001-202012.nc


53.1kKiB [00:19, 2.73kKiB/s]                          


Downloaded size does not match expected size!
 FYI, the status code was  200


 60%|██████    | 3/5 [00:29<00:23, 11.58s/it]

Downloading tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r5i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r5i1p1f1_gn_185001-202012.nc


53.1kKiB [00:02, 23.0kKiB/s]                          
 80%|████████  | 4/5 [00:32<00:08,  8.18s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200
Downloading tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc from http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgE_dataroot/AR6/CMIP6/DAMIP/CCCma/CanESM5/hist-aer/r3i1p1f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc
Saving to /gws/nopw/j04/scenario/users/benhutch/DAMIP/hist-aer/tas/CanESM5tas_Amon_CanESM5_hist-aer_r3i1p1f1_gn_185001-202012.nc


53.1kKiB [00:13, 3.82kKiB/s]                          
100%|██████████| 5/5 [00:46<00:00,  9.39s/it]

Downloaded size does not match expected size!
 FYI, the status code was  200



