In [None]:
import xarray as xr
import cf_xarray as cfxr
import pyesgf
from pyesgf.logon import LogonManager
from pyesgf.search import SearchConnection

print(pyesgf.__version__)

lm = LogonManager()

if not lm.is_logged_on():
    myproxy_host = 'esgf-data.dkrz.de'
    lm.logon(hostname=myproxy_host, interactive=True, bootstrap=True)


print('logged on: {}'.format(lm.is_logged_on()))

In [None]:
ds = xr.open_dataset('http://esgf1.dkrz.de/thredds/dodsC/cordex/cordex/output/EUR-11/GERICS/ECMWF-ERAINT/evaluation/r0i0p0/GERICS-REMO2015/v1/fx/orog/v20180813/orog_EUR-11_ECMWF-ERAINT_evaluation_r0i0p0_GERICS-REMO2015_v1_fx.nc')

In [None]:
ds.orog.plot()

In [None]:
import requests
import xml.etree.ElementTree as ET
import numpy

# Author: Unknown
# I got the original version from a word document published by ESGF
# https://docs.google.com/document/d/1pxz1Kd3JHfFp8vR2JCVBfApbsHmbUQQstifhGNdc6U0/edit?usp=sharing

# API AT: https://github.com/ESGF/esgf.github.io/wiki/ESGF_Search_REST_API#results-pagination

def esgf_search(server="https://esgf-node.llnl.gov/esg-search/search",
                files_type="OPENDAP", local_node=True, project="CMIP6",
                verbose=False, format="application%2Fsolr%2Bjson",
                use_csrf=False, **search):
    client = requests.session()
    payload = search
    payload["project"] = project
    payload["type"]= "File"
    if local_node:
        payload["distrib"] = "false"
    if use_csrf:
        client.get(server)
        if 'csrftoken' in client.cookies:
            # Django 1.6 and up
            csrftoken = client.cookies['csrftoken']
        else:
            # older versions
            csrftoken = client.cookies['csrf']
        payload["csrfmiddlewaretoken"] = csrftoken

    payload["format"] = format

    offset = 0
    numFound = 10000
    all_files = []
    files_type = files_type.upper()
    while offset < numFound:
        payload["offset"] = offset
        url_keys = []
        for k in payload:
            url_keys += ["{}={}".format(k, payload[k])]

        url = "{}/?{}".format(server, "&".join(url_keys))
        print(url)
        r = client.get(url)
        r.raise_for_status()
        resp = r.json()["response"]
        return resp
        numFound = int(resp["numFound"])
        resp = resp["docs"]
        offset += len(resp)
        for d in resp:
            if verbose:
                for k in d:
                    print("{}: {}".format(k,d[k]))
            url = d["url"]
            for f in d["url"]:
                sp = f.split("|")
                if sp[-1] == files_type:
                    all_files.append(sp[0].split(".html")[0])
    return sorted(all_files)


def parse_urls(response):
    types = {}
    for r in response:
        url_type = r.split("|")[1]
        if "opendap" in url_type:
            types["opendap"] = r.split("|")[0][0:-5]
        else:
            types[url_type] = r.split("|")[0]   
    return types   
    #return {r.split("|")[1]: r.split("|")[0] for r in response}


def parse_result(response):
    files = response.json()['response']['docs']
    #return files
    #result = dict.fromkeys([f['dataset_id'] for f in files], {})
    result = {f['dataset_id']: {} for f in files}
    for f in files:
        id = f['dataset_id']
        urls = parse_urls(f['url'])
        for url_type, url in urls.items():
            if url_type in result[id].keys():
                result[id][url_type].append(url)
            else:
                result[id][url_type] = [url]
        #result[id].update(urls)
    return result
        

def esgf_search(url="https://esgf-node.llnl.gov/esg-search/search",
                files_type="OPENDAP", local_node=True, project="CORDEX", **search):
    params = dict(project=project,
                  type="File",
                  format="application/solr+json",
                  limit=500)
    params.update(search)
    response = requests.get(url, params)
    return parse_result(response)

In [None]:
result = esgf_search(url="http://esgf-data.dkrz.de/esg-search/search", project=['CORDEX', 'CORDEX-Reklies'], time_frequency='mon', 
                     variable="tas", experiment=["historical", "rcp26", "rcp45", "rcp85"], domain="EUR-11", ensemble="r1i1p1")
len(result)

In [None]:
from dask.distributed import Client
client = Client()
client

In [None]:
def open_mfdataset(
    files,
    use_cftime=True,
    parallel=True,
    data_vars="minimal",
    chunks={},
    coords="minimal",
    compat="override",
    drop=None,
    **kwargs
):
    """optimized function for opening large cf datasets.

    based on https://github.com/pydata/xarray/issues/1385#issuecomment-561920115

    """

    def drop_all_coords(ds):
        # ds = ds.drop(drop)
        return ds.reset_coords(drop=True)

    ds = xr.open_mfdataset(
        files,
        parallel=parallel,
        decode_times=False,
        combine="by_coords",
        preprocess=drop_all_coords,
        decode_cf=False,
        chunks=chunks,
        data_vars=data_vars,
        coords="minimal",
        compat="override",
        **kwargs
    )
    return xr.decode_cf(ds, use_cftime=use_cftime)


In [None]:
urls = result["cordex.output.EUR-11.DMI.ICHEC-EC-EARTH.historical.r1i1p1.HIRHAM5.v1.mon.tas.v20190108|cordexesg.dmi.dk"]["opendap"]
%time ds = open_mfdataset(urls, parallel=True, chunks={})

In [None]:
%time mean = ds.tas.groupby('time.year').mean('time').cf.mean(('X', 'Y')).compute()

In [None]:
mean.plot()

In [None]:
%%time
#dsets = {key: xr.open_mfdataset(urls['opendap'], parallel=True) for key, urls in result.items()}
dsets = {}
for dset_id, urls in result.items():
    try:
        print(f'opening: {dset_id}')
        dsets[dset_id] = open_mfdataset(urls['opendap'], parallel=True)
    except:
        print(f'failed to open: {dset_id}')

In [None]:
means = xr.concat([ds.tas.groupby('time.year').mean('time').cf.mean(('X', 'Y')) for ds in dsets.values()], 
                  dim=xr.DataArray(list(dsets.keys()), dims='dset'))

In [None]:
means

In [None]:
means = xr.open_dataset('means.nc')

# filter dataset id
means["dset"] = [".".join(str(ds.data).split(".")[0:10]) for ds in means.dset]

In [None]:
def concat_experiment(means):

In [None]:
rcp45 = means.where(means.dset.str.contains('rcp45'), drop=True).assign_coords(experiment = 'rcp45')
historical = means.where(means.dset.str.contains('historical'), drop=True).assign_coords(experiment = 'historical')

rcp45['dset'] = rcp45.dset.str.replace('.rcp45', '')
historical['dset'] = historical.dset.str.replace('.historical', '')

In [None]:
concat = xr.concat([historical, rcp45], dim='experiment')

In [None]:
concat.tas.plot(col='dset', col_wrap=8, hue='experiment')

In [None]:
concat.tas.sel(experiment='historical').plot(hue="dset", figsize=(20,10))
concat.tas.sel(experiment='rcp45').plot(hue="dset", figsize=(20,10))