In [None]:
from pyesgf.search import SearchConnection
import os
import pandas as pd
import requests
from tqdm import tqdm
import re
import shutil
import datetime as dt
import numpy as np
from matplotlib import pyplot as plt
from netCDF4 import Dataset, date2index, num2date, date2num

In [None]:
def create_folder(folder):
    
    """
    Create a folder.

    Parameters
    ----------
    folder: string
        The name of the folder
    """
        
    if os.path.exists(folder):
        shutil.rmtree(folder, ignore_errors=True)
    os.mkdir(folder)

In [None]:
def download(url, filename, folder):
    
    """
    Download files from the server.

    Parameters
    ----------
    url: string
        The url of the file
    filename: string
        The name of the file
    folder: string
        The name of the destination folder
    """
        
    print("Downloading ", filename)
    r = requests.get(url, stream=True)
    total_size, block_size = int(r.headers.get('content-length', 0)), 1024
    with open(folder + '/' + filename, 'wb') as f:
        for data in tqdm(r.iter_content(block_size),
                         total=total_size//block_size,
                         unit='KiB', unit_scale=True):
            f.write(data)

    if total_size != 0 and os.path.getsize(folder + '/' + filename) != total_size:
        print("Downloaded size does not match expected size!\n",
              "FYI, the status code was ", r.status_code)
        os.remove(folder + '/' + filename)

In [None]:
conn = SearchConnection('https://esgf-node.llnl.gov/esg-search', distrib=True)

In [None]:
query = conn.new_context(
    project = 'CMIP6',
    experiment_id = 'ssp585',
    source_id = 'ACCESS-CM2,CMCC-ESM2,MPI-ESM1-2-LR',
    variable = 'ts',
    realm = 'atmos',
    frequency = 'mon'
    )
results = query.search()
nentries = len(results)
print("Found {} entries".format(nentries))

In [None]:
files = []
num_files = 0
for i in range(0,nentries):
    hit = results[i].file_context().search()
    for j in range(0,len(hit)):
        file_info = [hit[j].filename, hit[j].download_url]
        files.append(file_info)
        num_files += 1
    print("In entry {}, there are {} files".format(i,len(hit)))
print("In total {} files were found".format(num_files))

In [None]:
useful_files = []
for i in range(0,len(files)):
    filename = files[i][0]
    subs = re.search(r'gn_.+nc', filename).group()
    start_date = subs[3:7]
    end_date = subs[10:14]
    if (int(start_date) <= 2015 and int(end_date) >= 2015):
        useful_files.append(files[i])
    elif (int(start_date) <= 2100 and int(end_date) >= 2100):
        useful_files.append(files[i])

In [None]:
new_folder = 'files_2015_2100'
create_folder(new_folder)
for i in range(0,len(useful_files)):
    print('Downloading file {} out of {}'.format(i,len(useful_files)))
    try:
        download(useful_files[i][1],useful_files[i][0],new_folder)
    except:
        continue