In [1]:
# Import the required modules
from pyesgf.search import SearchConnection
import os
import sys
import importlib
import pandas as pd
import requests
from tqdm import tqdm

# Set the os environment to on
os.environ['ESGF_PYCLIENT_NO_FACETS_STAR_WARNING'] = "on"

In [2]:
# Import the functions
sys.path.append('/home/users/benhutch/downloading-data/DAMIP/')

# Import the functions
from testing_download_functions import *

In [3]:
# Import the dictionaries
import dictionaries as dicts

In [4]:
# Import the functions again, so as to not have to restart the kernel
importlib.reload(sys.modules['testing_download_functions'])

# Import the functions again
from testing_download_functions import find_valid_nodes, create_results_list, \
                                        extract_file_context, check_file_exists_jasmin

# Import the dictionaries again, so as to not have to restart the kernel
importlib.reload(sys.modules['dictionaries'])

# Import the dictionaries again
import dictionaries as dicts

In [5]:
# Set up the search connection
connection = SearchConnection(dicts.search_connection, distrib=True)

In [6]:
# Set up the paramas
# To speed up the check, only search for year s1961
params = {
    'activity_id': 'DCPP',
    'experiment_id': 'dcppA-hindcast',
    'latest': True,
    'sub_experiment_id': 's1961',
    'project': 'CMIP6',
    'table_id': 'Amon',
}

In [7]:
# Loop over the variables
# variables = dicts.variables
models = dicts.models

# Set up the test variable - 'pr' precip
variables = ['pr']

# Create a dataframe for the results
# Containing three columns: model, experiment, does_model_exist
results_df = pd.DataFrame(columns=['model', 'variable', 'does_model_exist'])

# Loop over the variables
for variable in variables:
    print("Checking whether models exist for variable: " + variable)

    # Loop over the models
    for model in models:
        print("Checking whether model: " + model + " exists for variable: " + variable)

        # Set up the params
        params['variable_id'] = variable
        params['source_id'] = model

        # Query the database
        ctx = connection.new_context(**params)

        try:
            # Get the results from the query
            results = ctx.search()
        except:
            print("Model: " + model + " does not exist for variable: " + variable)
            results = []

        # If the length of results is greater than 0
        if len(results) > 0:
            print("Model: " + model + " exists for variable: " + variable)

            # Append True to the dataframe
            results_df = pd.concat([results_df, pd.DataFrame({'model': [model], 'variable': [variable], 'does_model_exist': [True]})], ignore_index=True)
        else:
            print("Model: " + model + " does not exist for variable: " + variable)

            # Append False to the dataframe
            results_df = pd.concat([results_df, pd.DataFrame({'model': [model], 'variable': [variable], 'does_model_exist': [False]})], ignore_index=True)

# Print the dataframe
results_df

Checking whether models exist for variable: pr
Checking whether model: BCC-CSM2-MR exists for variable: pr
Model: BCC-CSM2-MR exists for variable: pr
Checking whether model: MPI-ESM1-2-HR exists for variable: pr
Model: MPI-ESM1-2-HR exists for variable: pr
Checking whether model: CanESM5 exists for variable: pr
Model: CanESM5 exists for variable: pr
Checking whether model: CMCC-CM2-SR5 exists for variable: pr
Model: CMCC-CM2-SR5 exists for variable: pr
Checking whether model: HadGEM3-GC31-MM exists for variable: pr
Model: HadGEM3-GC31-MM exists for variable: pr
Checking whether model: EC-Earth3 exists for variable: pr
Model: EC-Earth3 exists for variable: pr
Checking whether model: MPI-ESM1-2-LR exists for variable: pr
Model: MPI-ESM1-2-LR exists for variable: pr
Checking whether model: FGOALS-f3-L exists for variable: pr
Model: FGOALS-f3-L exists for variable: pr
Checking whether model: MIROC6 exists for variable: pr
Model: MIROC6 exists for variable: pr
Checking whether model: IPSL-C

Unnamed: 0,model,variable,does_model_exist
0,BCC-CSM2-MR,pr,True
1,MPI-ESM1-2-HR,pr,True
2,CanESM5,pr,True
3,CMCC-CM2-SR5,pr,True
4,HadGEM3-GC31-MM,pr,True
5,EC-Earth3,pr,True
6,MPI-ESM1-2-LR,pr,True
7,FGOALS-f3-L,pr,True
8,MIROC6,pr,True
9,IPSL-CM6A-LR,pr,True


In [7]:
# Create an empty dictionary to store the valid nodes for each variable
variable_nodes = {}


# Find the valid nodes for each variable and models list combination
# Loop over the var models dictionary
for var, models_list in dicts.var_models_test_pr.items():
    print("Finding the valid nodes for variable: " + var)
    print("Models list: " + str(models_list))

    # Set up the variable_id for the params
    params['variable_id'] = var

    # Append the variable to the dictionary
    variable_nodes[var] = {}

    # Find the valid nodes
    valid_nodes = find_valid_nodes(params=params, 
                                   models_list=models_list,
                                   conn=connection)
    
    # Append the valid nodes to the dictionary
    variable_nodes[var] = valid_nodes


Finding the valid nodes for variable: pr
Models list: ['BCC-CSM2-MR', 'MPI-ESM1-2-HR', 'CanESM5', 'CMCC-CM2-SR5', 'HadGEM3-GC31-MM', 'EC-Earth3', 'MPI-ESM1-2-LR', 'FGOALS-f3-L', 'MIROC6', 'IPSL-CM6A-LR', 'NorCPM1']
trying to find valid nodes for model: BCC-CSM2-MR
{'activity_id': 'DCPP', 'experiment_id': 'dcppA-hindcast', 'latest': True, 'sub_experiment_id': 's1961', 'project': 'CMIP6', 'table_id': 'Amon', 'variable_id': 'pr', 'source_id': 'BCC-CSM2-MR'}
40
{'esgf.ceda.ac.uk', 'cmip.bcc.cma.cn', 'esgf-data1.llnl.gov', 'esgf3.dkrz.de', 'esgf.nci.org.au'}
trying to find valid files for node: esgf.ceda.ac.uk
8
trying to find valid files for node: cmip.bcc.cma.cn
8
trying to find valid files for node: esgf-data1.llnl.gov
8
trying to find valid files for node: esgf3.dkrz.de
8
trying to find valid files for node: esgf.nci.org.au
8
trying to find valid nodes for model: MPI-ESM1-2-HR
{'activity_id': 'DCPP', 'experiment_id': 'dcppA-hindcast', 'latest': True, 'sub_experiment_id': 's1961', 'proje

In [8]:
# For each variable, create the results list
# Initialize an empty dictionary to store the results
results = {}

# Loop over the variables
for var in dicts.variables_test_pr:
    print("Finding the results for variable: " + var)

    # Extract the valid nodes for the variable
    valid_nodes = variable_nodes[var]

    # Append the variable to params
    params['variable_id'] = var

    # print the type of valid nodes
    print("Valid nodes type: " + str(type(valid_nodes)))
    print("Valid nodes: " + str(valid_nodes))

    # Find the results for the variable
    var_results = create_results_list(params=params,
                                      max_results_list=valid_nodes,
                                      connection=connection)

    # Append the results to the dictionary
    results[var] = var_results

Finding the results for variable: pr
Valid nodes type: <class 'list'>
Valid nodes: [{'source_id': 'BCC-CSM2-MR', 'data_node': 'esgf.ceda.ac.uk', 'num_results': 8}, {'source_id': 'MPI-ESM1-2-HR', 'data_node': 'esgf3.dkrz.de', 'num_results': 10}, {'source_id': 'CanESM5', 'data_node': 'esgf.ceda.ac.uk', 'num_results': 40}, {'source_id': 'CMCC-CM2-SR5', 'data_node': 'esgf-data1.llnl.gov', 'num_results': 20}, {'source_id': 'HadGEM3-GC31-MM', 'data_node': 'esgf-data1.llnl.gov', 'num_results': 10}, {'source_id': 'EC-Earth3', 'data_node': 'esgf.bsc.es', 'num_results': 21}, {'source_id': 'MPI-ESM1-2-LR', 'data_node': 'esgf.dwd.de', 'num_results': 16}, {'source_id': 'FGOALS-f3-L', 'data_node': 'esgf-data1.llnl.gov', 'num_results': 9}, {'source_id': 'MIROC6', 'data_node': 'esgf-data02.diasjp.net', 'num_results': 10}, {'source_id': 'IPSL-CM6A-LR', 'data_node': 'esgf-data1.llnl.gov', 'num_results': 10}, {'source_id': 'NorCPM1', 'data_node': 'esgf-data1.llnl.gov', 'num_results': 20}]


  0%|          | 0/11 [00:00<?, ?it/s]

Querying for source_id: BCC-CSM2-MR and data_node: esgf.ceda.ac.uk


  9%|▉         | 1/11 [00:02<00:26,  2.66s/it]

Found 8 results.
Querying for source_id: MPI-ESM1-2-HR and data_node: esgf3.dkrz.de


 18%|█▊        | 2/11 [00:05<00:23,  2.61s/it]

Found 10 results.
Querying for source_id: CanESM5 and data_node: esgf.ceda.ac.uk


 27%|██▋       | 3/11 [00:07<00:20,  2.62s/it]

Found 40 results.
Querying for source_id: CMCC-CM2-SR5 and data_node: esgf-data1.llnl.gov


 36%|███▋      | 4/11 [00:09<00:15,  2.25s/it]

Found 20 results.
Querying for source_id: HadGEM3-GC31-MM and data_node: esgf-data1.llnl.gov


 45%|████▌     | 5/11 [00:11<00:11,  1.99s/it]

Found 10 results.
Querying for source_id: EC-Earth3 and data_node: esgf.bsc.es


 55%|█████▍    | 6/11 [00:13<00:11,  2.26s/it]

Found 21 results.
Querying for source_id: MPI-ESM1-2-LR and data_node: esgf.dwd.de


 64%|██████▎   | 7/11 [00:19<00:13,  3.32s/it]

Found 16 results.
Querying for source_id: FGOALS-f3-L and data_node: esgf-data1.llnl.gov


 73%|███████▎  | 8/11 [00:22<00:09,  3.17s/it]

Found 9 results.
Querying for source_id: MIROC6 and data_node: esgf-data02.diasjp.net


 82%|████████▏ | 9/11 [00:23<00:05,  2.64s/it]

Found 10 results.
Querying for source_id: IPSL-CM6A-LR and data_node: esgf-data1.llnl.gov


 91%|█████████ | 10/11 [00:25<00:02,  2.30s/it]

Found 10 results.
Querying for source_id: NorCPM1 and data_node: esgf-data1.llnl.gov


100%|██████████| 11/11 [00:26<00:00,  2.42s/it]

Found 20 results.





In [10]:
print("Results: " + str(results))
# print the length of the results
print("Length of results: " + str(len(results)))
print(type(results['pr']))

Results: {'pr': [<pyesgf.search.results.ResultSet object at 0x7f9684ebbd00>, <pyesgf.search.results.ResultSet object at 0x7f9685027a60>, <pyesgf.search.results.ResultSet object at 0x7f9685026dd0>, <pyesgf.search.results.ResultSet object at 0x7f9685027880>, <pyesgf.search.results.ResultSet object at 0x7f96850279a0>, <pyesgf.search.results.ResultSet object at 0x7f9684ebbdc0>, <pyesgf.search.results.ResultSet object at 0x7f9684ebbd90>, <pyesgf.search.results.ResultSet object at 0x7f96850277c0>, <pyesgf.search.results.ResultSet object at 0x7f9684f682b0>, <pyesgf.search.results.ResultSet object at 0x7f9684f69540>, <pyesgf.search.results.ResultSet object at 0x7f9684f6a830>]}
Length of results: 1
<class 'list'>


In [11]:
# Initialize an empty dictionary to store the results file context
results_file_context = {}

# For each of the variables
for var, results in results.items():
    print("Variable: " + var)
    print("Results: " + str(results))

    # Initialize an empty dictionary to store the file context
    file_context_list = []

    # Loop over the results
    for result in results:
        print("Result: " + str(result))

        # Extract the file context
        file_context = extract_file_context(result)

        # Append the file context to the list
        file_context_list.append(file_context)

    # Append the file context list to the dictionary
    results_file_context[var] = file_context_list

Variable: pr
Results: [<pyesgf.search.results.ResultSet object at 0x7f9684ebbd00>, <pyesgf.search.results.ResultSet object at 0x7f9685027a60>, <pyesgf.search.results.ResultSet object at 0x7f9685026dd0>, <pyesgf.search.results.ResultSet object at 0x7f9685027880>, <pyesgf.search.results.ResultSet object at 0x7f96850279a0>, <pyesgf.search.results.ResultSet object at 0x7f9684ebbdc0>, <pyesgf.search.results.ResultSet object at 0x7f9684ebbd90>, <pyesgf.search.results.ResultSet object at 0x7f96850277c0>, <pyesgf.search.results.ResultSet object at 0x7f9684f682b0>, <pyesgf.search.results.ResultSet object at 0x7f9684f69540>, <pyesgf.search.results.ResultSet object at 0x7f9684f6a830>]
Result: <pyesgf.search.results.ResultSet object at 0x7f9684ebbd00>
Extracting file context for 8 datasets...


 12%|█▎        | 1/8 [00:09<01:09,  9.93s/it]

Processed 1 out of 8 results.


 25%|██▌       | 2/8 [00:18<00:55,  9.24s/it]

Processed 2 out of 8 results.


 38%|███▊      | 3/8 [00:27<00:44,  8.98s/it]

Processed 3 out of 8 results.


 50%|█████     | 4/8 [00:36<00:35,  8.90s/it]

Processed 4 out of 8 results.


 62%|██████▎   | 5/8 [00:44<00:26,  8.82s/it]

Processed 5 out of 8 results.


 75%|███████▌  | 6/8 [00:53<00:17,  8.79s/it]

Processed 6 out of 8 results.


 88%|████████▊ | 7/8 [01:02<00:08,  8.81s/it]

Processed 7 out of 8 results.


100%|██████████| 8/8 [01:11<00:00,  8.88s/it]


Processed 8 out of 8 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9685027a60>
Extracting file context for 10 datasets...


 10%|█         | 1/10 [00:08<01:12,  8.06s/it]

Processed 1 out of 10 results.


 20%|██        | 2/10 [00:19<01:19,  9.92s/it]

Processed 2 out of 10 results.


 30%|███       | 3/10 [00:27<01:03,  9.08s/it]

Processed 3 out of 10 results.


 40%|████      | 4/10 [00:38<00:58,  9.72s/it]

Processed 4 out of 10 results.


 50%|█████     | 5/10 [00:46<00:47,  9.43s/it]

Processed 5 out of 10 results.


 60%|██████    | 6/10 [00:54<00:35,  8.84s/it]

Processed 6 out of 10 results.


 70%|███████   | 7/10 [01:02<00:25,  8.43s/it]

Processed 7 out of 10 results.


 80%|████████  | 8/10 [01:10<00:17,  8.51s/it]

Processed 8 out of 10 results.


 90%|█████████ | 9/10 [01:18<00:08,  8.20s/it]

Processed 9 out of 10 results.


100%|██████████| 10/10 [01:27<00:00,  8.72s/it]


Processed 10 out of 10 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9685026dd0>
Extracting file context for 40 datasets...


  2%|▎         | 1/40 [00:09<05:58,  9.18s/it]

Processed 1 out of 40 results.


  5%|▌         | 2/40 [00:17<05:40,  8.95s/it]

Processed 2 out of 40 results.


  8%|▊         | 3/40 [00:27<05:42,  9.26s/it]

Processed 3 out of 40 results.


 10%|█         | 4/40 [00:36<05:24,  9.01s/it]

Processed 4 out of 40 results.


 12%|█▎        | 5/40 [00:44<05:11,  8.89s/it]

Processed 5 out of 40 results.


 15%|█▌        | 6/40 [00:58<05:53, 10.41s/it]

Processed 6 out of 40 results.


 18%|█▊        | 7/40 [01:07<05:25,  9.87s/it]

Processed 7 out of 40 results.


 20%|██        | 8/40 [01:15<05:02,  9.44s/it]

Processed 8 out of 40 results.


 22%|██▎       | 9/40 [01:24<04:44,  9.17s/it]

Processed 9 out of 40 results.


 25%|██▌       | 10/40 [01:32<04:29,  8.98s/it]

Processed 10 out of 40 results.


 28%|██▊       | 11/40 [01:41<04:16,  8.85s/it]

Processed 11 out of 40 results.


 30%|███       | 12/40 [01:49<04:05,  8.77s/it]

Processed 12 out of 40 results.


 32%|███▎      | 13/40 [01:58<03:55,  8.72s/it]

Processed 13 out of 40 results.


 35%|███▌      | 14/40 [02:08<03:53,  8.99s/it]

Processed 14 out of 40 results.


 38%|███▊      | 15/40 [02:17<03:49,  9.17s/it]

Processed 15 out of 40 results.


 40%|████      | 16/40 [02:28<03:52,  9.69s/it]

Processed 16 out of 40 results.


 42%|████▎     | 17/40 [02:37<03:36,  9.40s/it]

Processed 17 out of 40 results.


 45%|████▌     | 18/40 [02:46<03:23,  9.23s/it]

Processed 18 out of 40 results.


 48%|████▊     | 19/40 [02:57<03:30, 10.02s/it]

Processed 19 out of 40 results.


 50%|█████     | 20/40 [03:06<03:12,  9.64s/it]

Processed 20 out of 40 results.


 52%|█████▎    | 21/40 [03:15<02:58,  9.39s/it]

Processed 21 out of 40 results.


 55%|█████▌    | 22/40 [03:24<02:47,  9.33s/it]

Processed 22 out of 40 results.


 57%|█████▊    | 23/40 [03:34<02:41,  9.48s/it]

Processed 23 out of 40 results.


 60%|██████    | 24/40 [03:43<02:28,  9.28s/it]

Processed 24 out of 40 results.


 62%|██████▎   | 25/40 [03:53<02:21,  9.46s/it]

Processed 25 out of 40 results.


 65%|██████▌   | 26/40 [04:03<02:13,  9.55s/it]

Processed 26 out of 40 results.


 68%|██████▊   | 27/40 [04:14<02:11, 10.11s/it]

Processed 27 out of 40 results.


 70%|███████   | 28/40 [04:23<01:59,  9.94s/it]

Processed 28 out of 40 results.


 72%|███████▎  | 29/40 [04:32<01:44,  9.53s/it]

Processed 29 out of 40 results.


 75%|███████▌  | 30/40 [04:41<01:32,  9.22s/it]

Processed 30 out of 40 results.


 78%|███████▊  | 31/40 [04:50<01:24,  9.34s/it]

Processed 31 out of 40 results.


 80%|████████  | 32/40 [05:00<01:15,  9.50s/it]

Processed 32 out of 40 results.


 82%|████████▎ | 33/40 [05:09<01:05,  9.30s/it]

Processed 33 out of 40 results.


 85%|████████▌ | 34/40 [05:18<00:54,  9.14s/it]

Processed 34 out of 40 results.


 88%|████████▊ | 35/40 [05:27<00:46,  9.32s/it]

Processed 35 out of 40 results.


 90%|█████████ | 36/40 [05:36<00:36,  9.13s/it]

Processed 36 out of 40 results.


 92%|█████████▎| 37/40 [05:46<00:27,  9.30s/it]

Processed 37 out of 40 results.


 95%|█████████▌| 38/40 [05:55<00:18,  9.14s/it]

Processed 38 out of 40 results.


 98%|█████████▊| 39/40 [06:04<00:09,  9.19s/it]

Processed 39 out of 40 results.


100%|██████████| 40/40 [06:13<00:00,  9.33s/it]


Processed 40 out of 40 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9685027880>
Extracting file context for 20 datasets...


  5%|▌         | 1/20 [00:02<00:50,  2.68s/it]

Processed 1 out of 20 results.


 10%|█         | 2/20 [00:05<00:46,  2.58s/it]

Processed 2 out of 20 results.


 15%|█▌        | 3/20 [00:06<00:33,  1.96s/it]

Processed 3 out of 20 results.


 20%|██        | 4/20 [00:07<00:26,  1.67s/it]

Processed 4 out of 20 results.


 25%|██▌       | 5/20 [00:08<00:22,  1.52s/it]

Processed 5 out of 20 results.


 30%|███       | 6/20 [00:10<00:20,  1.43s/it]

Processed 6 out of 20 results.


 35%|███▌      | 7/20 [00:12<00:22,  1.72s/it]

Processed 7 out of 20 results.


 40%|████      | 8/20 [00:13<00:18,  1.56s/it]

Processed 8 out of 20 results.


 45%|████▌     | 9/20 [00:16<00:19,  1.80s/it]

Processed 9 out of 20 results.


 50%|█████     | 10/20 [00:17<00:16,  1.63s/it]

Processed 10 out of 20 results.


 55%|█████▌    | 11/20 [00:18<00:13,  1.51s/it]

Processed 11 out of 20 results.


 60%|██████    | 12/20 [00:20<00:13,  1.73s/it]

Processed 12 out of 20 results.


 65%|██████▌   | 13/20 [00:21<00:11,  1.57s/it]

Processed 13 out of 20 results.


 70%|███████   | 14/20 [00:23<00:08,  1.49s/it]

Processed 14 out of 20 results.


 75%|███████▌  | 15/20 [00:24<00:07,  1.42s/it]

Processed 15 out of 20 results.


 80%|████████  | 16/20 [00:25<00:05,  1.37s/it]

Processed 16 out of 20 results.


 85%|████████▌ | 17/20 [00:26<00:03,  1.33s/it]

Processed 17 out of 20 results.


 90%|█████████ | 18/20 [00:28<00:02,  1.29s/it]

Processed 18 out of 20 results.


 95%|█████████▌| 19/20 [00:29<00:01,  1.29s/it]

Processed 19 out of 20 results.


100%|██████████| 20/20 [00:30<00:00,  1.53s/it]


Processed 20 out of 20 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f96850279a0>
Extracting file context for 10 datasets...


 10%|█         | 1/10 [00:02<00:22,  2.53s/it]

Processed 1 out of 10 results.


 20%|██        | 2/10 [00:03<00:14,  1.85s/it]

Processed 2 out of 10 results.


 30%|███       | 3/10 [00:05<00:11,  1.63s/it]

Processed 3 out of 10 results.


 40%|████      | 4/10 [00:09<00:16,  2.76s/it]

Processed 4 out of 10 results.


 50%|█████     | 5/10 [00:11<00:12,  2.46s/it]

Processed 5 out of 10 results.


 60%|██████    | 6/10 [00:14<00:09,  2.44s/it]

Processed 6 out of 10 results.


 70%|███████   | 7/10 [00:15<00:06,  2.13s/it]

Processed 7 out of 10 results.


 80%|████████  | 8/10 [00:17<00:03,  1.99s/it]

Processed 8 out of 10 results.


 90%|█████████ | 9/10 [00:18<00:01,  1.81s/it]

Processed 9 out of 10 results.


100%|██████████| 10/10 [00:20<00:00,  2.01s/it]


Processed 10 out of 10 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9684ebbdc0>
Extracting file context for 21 datasets...


  5%|▍         | 1/21 [00:11<03:51, 11.57s/it]

Processed 1 out of 21 results.


 10%|▉         | 2/21 [00:23<03:39, 11.55s/it]

Processed 2 out of 21 results.


 14%|█▍        | 3/21 [00:34<03:29, 11.63s/it]

Processed 3 out of 21 results.


 19%|█▉        | 4/21 [00:51<03:48, 13.43s/it]

Processed 4 out of 21 results.


 24%|██▍       | 5/21 [01:02<03:25, 12.85s/it]

Processed 5 out of 21 results.


 29%|██▊       | 6/21 [01:14<03:07, 12.53s/it]

Processed 6 out of 21 results.


 33%|███▎      | 7/21 [01:26<02:51, 12.28s/it]

Processed 7 out of 21 results.


 38%|███▊      | 8/21 [01:38<02:37, 12.14s/it]

Processed 8 out of 21 results.


 43%|████▎     | 9/21 [01:50<02:26, 12.22s/it]

Processed 9 out of 21 results.


 48%|████▊     | 10/21 [02:02<02:13, 12.12s/it]

Processed 10 out of 21 results.


 52%|█████▏    | 11/21 [02:14<01:59, 11.96s/it]

Processed 11 out of 21 results.


 57%|█████▋    | 12/21 [02:26<01:49, 12.16s/it]

Processed 12 out of 21 results.


 62%|██████▏   | 13/21 [02:39<01:38, 12.36s/it]

Processed 13 out of 21 results.


 67%|██████▋   | 14/21 [02:55<01:34, 13.51s/it]

Processed 14 out of 21 results.


 71%|███████▏  | 15/21 [03:07<01:17, 12.92s/it]

Processed 15 out of 21 results.


 76%|███████▌  | 16/21 [03:19<01:02, 12.55s/it]

Processed 16 out of 21 results.


 81%|████████  | 17/21 [03:30<00:48, 12.23s/it]

Processed 17 out of 21 results.


 86%|████████▌ | 18/21 [03:42<00:36, 12.05s/it]

Processed 18 out of 21 results.


 90%|█████████ | 19/21 [03:56<00:25, 12.74s/it]

Processed 19 out of 21 results.


 95%|█████████▌| 20/21 [04:08<00:12, 12.48s/it]

Processed 20 out of 21 results.


100%|██████████| 21/21 [04:20<00:00, 12.39s/it]


Processed 21 out of 21 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9684ebbd90>
Extracting file context for 16 datasets...


  6%|▋         | 1/16 [00:09<02:20,  9.40s/it]

Processed 1 out of 16 results.


 12%|█▎        | 2/16 [00:16<01:56,  8.32s/it]

Processed 2 out of 16 results.


 19%|█▉        | 3/16 [00:28<02:04,  9.58s/it]

Processed 3 out of 16 results.


 25%|██▌       | 4/16 [00:35<01:45,  8.81s/it]

Processed 4 out of 16 results.


 31%|███▏      | 5/16 [00:43<01:31,  8.34s/it]

Processed 5 out of 16 results.


 38%|███▊      | 6/16 [00:50<01:20,  8.07s/it]

Processed 6 out of 16 results.


 44%|████▍     | 7/16 [00:58<01:12,  8.02s/it]

Processed 7 out of 16 results.


 50%|█████     | 8/16 [01:06<01:03,  7.92s/it]

Processed 8 out of 16 results.


 56%|█████▋    | 9/16 [01:15<00:57,  8.19s/it]

Processed 9 out of 16 results.


 62%|██████▎   | 10/16 [01:23<00:50,  8.33s/it]

Processed 10 out of 16 results.


 69%|██████▉   | 11/16 [01:31<00:40,  8.18s/it]

Processed 11 out of 16 results.


 75%|███████▌  | 12/16 [01:39<00:31,  7.99s/it]

Processed 12 out of 16 results.


 81%|████████▏ | 13/16 [01:46<00:23,  7.84s/it]

Processed 13 out of 16 results.


 88%|████████▊ | 14/16 [01:55<00:16,  8.11s/it]

Processed 14 out of 16 results.


 94%|█████████▍| 15/16 [02:02<00:07,  7.93s/it]

Processed 15 out of 16 results.


100%|██████████| 16/16 [02:10<00:00,  8.17s/it]


Processed 16 out of 16 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f96850277c0>
Extracting file context for 9 datasets...


 11%|█         | 1/9 [00:01<00:13,  1.68s/it]

Processed 1 out of 9 results.


 22%|██▏       | 2/9 [00:03<00:13,  1.99s/it]

Processed 2 out of 9 results.


 33%|███▎      | 3/9 [00:05<00:10,  1.67s/it]

Processed 3 out of 9 results.


 44%|████▍     | 4/9 [00:06<00:07,  1.50s/it]

Processed 4 out of 9 results.


 56%|█████▌    | 5/9 [00:07<00:05,  1.44s/it]

Processed 5 out of 9 results.


 67%|██████▋   | 6/9 [00:08<00:04,  1.36s/it]

Processed 6 out of 9 results.


 78%|███████▊  | 7/9 [00:16<00:06,  3.31s/it]

Processed 7 out of 9 results.


 89%|████████▉ | 8/9 [00:17<00:02,  2.64s/it]

Processed 8 out of 9 results.


100%|██████████| 9/9 [00:19<00:00,  2.20s/it]


Processed 9 out of 9 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9684f682b0>
Extracting file context for 10 datasets...


 10%|█         | 1/10 [00:01<00:12,  1.40s/it]

Processed 1 out of 10 results.


 20%|██        | 2/10 [00:02<00:10,  1.30s/it]

Processed 2 out of 10 results.


 30%|███       | 3/10 [00:03<00:08,  1.28s/it]

Processed 3 out of 10 results.


 40%|████      | 4/10 [00:05<00:07,  1.31s/it]

Processed 4 out of 10 results.


 50%|█████     | 5/10 [00:06<00:06,  1.29s/it]

Processed 5 out of 10 results.


 60%|██████    | 6/10 [00:07<00:05,  1.29s/it]

Processed 6 out of 10 results.


 70%|███████   | 7/10 [00:09<00:03,  1.30s/it]

Processed 7 out of 10 results.


 80%|████████  | 8/10 [00:11<00:03,  1.60s/it]

Processed 8 out of 10 results.


 90%|█████████ | 9/10 [00:13<00:01,  1.79s/it]

Processed 9 out of 10 results.


100%|██████████| 10/10 [00:14<00:00,  1.48s/it]


Processed 10 out of 10 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9684f69540>
Extracting file context for 10 datasets...


 10%|█         | 1/10 [00:01<00:11,  1.26s/it]

Processed 1 out of 10 results.


 20%|██        | 2/10 [00:03<00:14,  1.85s/it]

Processed 2 out of 10 results.


 30%|███       | 3/10 [00:04<00:10,  1.56s/it]

Processed 3 out of 10 results.


 40%|████      | 4/10 [00:05<00:08,  1.44s/it]

Processed 4 out of 10 results.


 50%|█████     | 5/10 [00:08<00:08,  1.73s/it]

Processed 5 out of 10 results.


 60%|██████    | 6/10 [00:10<00:07,  1.90s/it]

Processed 6 out of 10 results.


 70%|███████   | 7/10 [00:12<00:06,  2.02s/it]

Processed 7 out of 10 results.


 80%|████████  | 8/10 [00:14<00:03,  1.83s/it]

Processed 8 out of 10 results.


 90%|█████████ | 9/10 [00:15<00:01,  1.65s/it]

Processed 9 out of 10 results.


100%|██████████| 10/10 [00:16<00:00,  1.66s/it]


Processed 10 out of 10 results.
Result: <pyesgf.search.results.ResultSet object at 0x7f9684f6a830>
Extracting file context for 20 datasets...


  5%|▌         | 1/20 [00:02<00:42,  2.24s/it]

Processed 1 out of 20 results.


 10%|█         | 2/20 [00:03<00:29,  1.64s/it]

Processed 2 out of 20 results.


 15%|█▌        | 3/20 [00:04<00:24,  1.47s/it]

Processed 3 out of 20 results.


 20%|██        | 4/20 [00:05<00:22,  1.38s/it]

Processed 4 out of 20 results.


 25%|██▌       | 5/20 [00:07<00:20,  1.37s/it]

Processed 5 out of 20 results.


 30%|███       | 6/20 [00:08<00:18,  1.34s/it]

Processed 6 out of 20 results.


 35%|███▌      | 7/20 [00:09<00:17,  1.34s/it]

Processed 7 out of 20 results.


 40%|████      | 8/20 [00:11<00:15,  1.30s/it]

Processed 8 out of 20 results.


 45%|████▌     | 9/20 [00:13<00:17,  1.59s/it]

Processed 9 out of 20 results.


 50%|█████     | 10/20 [00:15<00:17,  1.80s/it]

Processed 10 out of 20 results.


 55%|█████▌    | 11/20 [00:16<00:14,  1.62s/it]

Processed 11 out of 20 results.


 60%|██████    | 12/20 [00:18<00:12,  1.51s/it]

Processed 12 out of 20 results.


 65%|██████▌   | 13/20 [00:19<00:10,  1.46s/it]

Processed 13 out of 20 results.


 70%|███████   | 14/20 [00:20<00:08,  1.41s/it]

Processed 14 out of 20 results.


 75%|███████▌  | 15/20 [00:22<00:06,  1.36s/it]

Processed 15 out of 20 results.


 80%|████████  | 16/20 [00:23<00:05,  1.33s/it]

Processed 16 out of 20 results.


 85%|████████▌ | 17/20 [00:24<00:03,  1.32s/it]

Processed 17 out of 20 results.


 90%|█████████ | 18/20 [00:25<00:02,  1.29s/it]

Processed 18 out of 20 results.


 95%|█████████▌| 19/20 [00:27<00:01,  1.33s/it]

Processed 19 out of 20 results.


100%|██████████| 20/20 [00:28<00:00,  1.43s/it]

Processed 20 out of 20 results.





In [12]:
# Create an empty dataframe to store the results
df = pd.DataFrame()

for var, file_context_lists in results_file_context.items():
    print("Variable: " + var)

    # Loop over the file contexts
    for file_context in file_context_lists:
        # Convert the dictionary to a dataframe
        file_context_df = pd.DataFrame.from_dict(file_context)

        # Add a new column on the far left of the dataframe
        # containing the variable name
        file_context_df.insert(0, 'variable', var)

        # Concatenate the dataframe to the results dataframe
        df = pd.concat([df, file_context_df], ignore_index=True)

# Print the dataframe
df

Variable: pr


Unnamed: 0,variable,0,1,2,3,4,5,6,7,8,...,221,222,223,224,225,226,227,228,229,230
0,pr,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,{'filename': 'pr_Amon_BCC-CSM2-MR_dcppA-hindca...,,...,,,,,,,,,,
1,pr,,,,,,,,,,...,,,,,,,,,,
2,pr,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,{'filename': 'pr_Amon_MPI-ESM1-2-HR_dcppA-hind...,...,,,,,,,,,,
3,pr,,,,,,,,,,...,,,,,,,,,,
4,pr,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,{'filename': 'pr_Amon_CanESM5_dcppA-hindcast_s...,...,,,,,,,,,,
5,pr,,,,,,,,,,...,,,,,,,,,,
6,pr,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,{'filename': 'pr_Amon_CMCC-CM2-SR5_dcppA-hindc...,...,,,,,,,,,,
7,pr,,,,,,,,,,...,,,,,,,,,,
8,pr,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,{'filename': 'pr_Amon_HadGEM3-GC31-MM_dcppA-hi...,...,,,,,,,,,,
9,pr,,,,,,,,,,...,,,,,,,,,,


In [13]:
# Set up the directory for the data on JASMIN
dcpp_dir_badc = "/badc/cmip6/data/CMIP6/DCPP/"

# Checkk whether these files exist on JASMIN
jasmin_files_df = check_file_exists_jasmin(df=df,
                                            directory=dcpp_dir_badc)

KeyError: 'filename'

In [64]:
# Now set up the group work space directory
dcpp_dir_gws = "/gws/nopw/j04/canari/users/benhutch/"

# Check whether these files exist on JAASMIN
jasmin_files_df_gws = check_file_exists_jasmin(df=df,
                                            directory=dcpp_dir_gws) 

BCC
gws
File does not exist for tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s1961-r4i1p1f1_gn_196101-197012.nc
BCC
gws
File does not exist for tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s1961-r2i1p1f1_gn_196101-197012.nc
BCC
gws
File does not exist for tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s1961-r3i1p1f1_gn_196101-197012.nc
BCC
gws
File does not exist for tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s1961-r5i1p1f1_gn_196101-197012.nc
BCC
gws
File does not exist for tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s1961-r8i1p1f1_gn_196101-197012.nc
BCC
gws
File does not exist for tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s1961-r6i1p1f1_gn_196101-197012.nc
BCC
gws
File does not exist for tas_Amon_BCC-CSM2-MR_dcppA-hindcast_s1961-r7i1p1f1_gn_196101-197012.nc
MPI-M
gws
File does not exist for tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1961-r3i1p1f1_gn_196111-197112.nc
MPI-M
gws
File does not exist for tas_Amon_MPI-ESM1-2-HR_dcppA-hindcast_s1961-r1i1p1f1_gn_196111-197112.nc
MPI-M
gws
File does not exist for tas_Amon_MPI-ESM1-2-HR_dcppA-hi