# Subset CMIP5 Datasets with xarray (ocean replica)

xarray: http://xarray.pydata.org/en/stable/index.html

Ocean data with opendap:

https://aims3.llnl.gov/thredds/dodsC/cmip5_css01_data/cmip5/output1/NOAA-GFDL/GFDL-HIRAM-C360/sst2030/mon/ocean/Omon/r3i1p1/v20110601/tos/tos_Omon_GFDL-HIRAM-C360_sst2030_r3i1p1_202601-203012.nc.html


## Search CMIP5 Dataset

using: https://esgf-pyclient.readthedocs.io/en/latest/index.html

In [None]:
from pyesgf.search import SearchConnection
conn = SearchConnection('https://esgf-data.dkrz.de/esg-search', distrib=True)


In [None]:
ctx = conn.new_context(
    project='CMIP5', 
    experiment='sst2030',
    model='GFDL-HIRAM-C360',
    ensemble='r3i1p1',
    time_frequency='mon',
    realm='ocean',
    #data_node='aims3.llnl.gov',
    #replica=False,
    )
ctx.hit_count

In [None]:
for result in ctx.search():
    print(result.dataset_id)


In [None]:
result = ctx.search()[1]
result.dataset_id

In [None]:
files = result.file_context().search()
for file in files:
    if 'tos' in file.opendap_url:
        tos_url = file.opendap_url
        print(tos_url)

## Subset single dataset with xarray

Using OpenDAP: http://xarray.pydata.org/en/stable/io.html?highlight=opendap#opendap

In [None]:
import xarray as xr
ds = xr.open_dataset(tos_url, chunks={'time': 120})
print(ds)

In [None]:
da = ds['tos']
da = da.isel(time=slice(0, 2))
da = da.sel(lat=slice(-50, 50), lon=slice(0, 50))
# da = da.squeeze()
da

In [None]:
%matplotlib inline
# da.squeeze().plot()

## Download to NetCDF

In [None]:
# da.to_netcdf('tos.nc')

## CDO
https://pypi.org/project/cdo/
https://code.mpimet.mpg.de/projects/cdo/wiki/Tutorial

In [None]:
ds_url = "https://aims3.llnl.gov/thredds/dodsC/cmip5_css01_data/cmip5/output1/NOAA-GFDL/GFDL-HIRAM-C360/sst2030/mon/ocean/Omon/r3i1p1/v20110601/tos/tos_Omon_GFDL-HIRAM-C360_sst2030_r3i1p1_202601-203012.nc"
# ds_url = "http://esgf-data1.ceda.ac.uk/thredds/dodsC/esg_dataroot/cmip5/output1/MOHC/HadCM3/rcp45/mon/atmos/Amon/r1i1p1/v20110905/tasmax/tasmax_Amon_HadCM3_rcp45_r1i1p1_200601-203012.nc"


In [None]:
from cdo import Cdo

In [None]:
cdo = Cdo()

In [None]:
cdo.sinfo(input=ds_url)

In [None]:
# cdo.seltimestep(input=f"{ds_url},1", output="out.nc")

In [None]:
# cdo -sellonlatbox,-180,180,0,90 -seltimestep,1 https://aims3.llnl.gov/thredds/dodsC/cmip5_css01_data/cmip5/output1/NOAA-GFDL/GFDL-HIRAM-C360/sst2030/mon/ocean/Omon/r3i1p1/v20110601/tos/tos_Omon_GFDL-HIRAM-C360_sst2030_r3i1p1_202601-203012.nc out.nc 