This script outputs daily 2m temperature data from ERA5 on the CESM grid.  It grabs hourly data on the 0.25 degree grid from the RDA on glade. Calculates the daily average and then regrids onto the CESM grid before renaming the vars and outputting to netcdf. 

The T2m data on the RDA is located at 
/gpfs/fs1/collections/rda/data/ds633.0/e5.oper.an.sfc/

In [1]:
import xarray as xr
import sys
import pandas as pd
import numpy as np
import xesmf as xe
import warnings
import dask

warnings.filterwarnings('ignore')

In [3]:
# get the workers going
ncores = 36
nmem = str(int(365*ncores/36))+'GB'
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
cluster = SLURMCluster(cores=ncores,
                     processes=ncores, memory=nmem,
                     project='P04010022',
                     walltime='2:00:00')
cluster.scale(ncores)
client = Client(cluster)

In [7]:
# do this until you see you've got some workers
client

0,1
Client  Scheduler: tcp://10.12.205.30:33126  Dashboard: http://10.12.205.30/proxy/39609/status,Cluster  Workers: 36  Cores: 36  Memory: 365.04 GB


In [5]:
# location of ERA5 data on RDA
filepath="/gpfs/fs1/collections/rda/data/ds633.0/e5.oper.an.sfc/"
# output location
outpath="/glade/scratch/islas/processed/era5/T2m_day/"

In [13]:
ystart=1981 ; yend=2019 ; nyears=yend-ystart+1

In [9]:
# open up CESM data to get the output grid.
cesmdat = xr.open_dataset("/glade/campaign/cesm/collections/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/monthly/PHIS/f.e11.F1850C5CNTVSST.f09_f09.002.cam.h0.PHIS.040101-050012.nc")
grid_out = xr.Dataset({'lat': (['lat'], cesmdat.lat)}, {'lon': (['lon'], cesmdat.lon)})

In [None]:
reusewgt=False
wgtfile=outpath+"wgtfile.nc"
for iyear in range(ystart,yend+1,1):
    print(iyear)
    timeout = pd.date_range(start = str(iyear)+"-01-01", end = str(iyear)+"-12-31")
    outfile=outpath+"t2m_"+str(iyear)+".nc"
    
    countdays=0
    for imon in range(1,12+1,1):
        monstr=str(imon).zfill(2)
        file=filepath+"/"+str(iyear)+monstr+"/*_2t*.nc"
        print(file)
        data = xr.open_mfdataset(file,coords="minimal", join="override",
                             decode_times=True, use_cftime=True, chunks = {'time':24} )
        dataday = data.groupby('time.dayofyear').mean('time')
        dataday = dataday.rename(longitude="lon")
        dataday = dataday.rename(latitude="lat")

        regridder = xe.Regridder(dataday, grid_out, 'bilinear', periodic=True, reuse_weights=reusewgt, filename=wgtfile)
        dataday_rg = regridder(dataday.VAR_2T)
        
        if (imon == 1):
            t2m = dataday_rg
        else:
            t2m = xr.concat([t2m,dataday_rg], dim="dayofyear", join="override")
        
    t2m = t2m.assign_coords(dayofyear=timeout)
    t2m = t2m.rename(dayofyear="time")
    t2m = t2m.rename("t2m")
    t2m.to_netcdf(path=outfile)
    
    reusewgt=True