In [3]:
import numpy as np
import xarray as xr
from distributed import Client
import dask_jobqueue
from matplotlib.colors import LinearSegmentedColormap
#import daskdataframe as dd
import matplotlib.pyplot as plt
import glob
import netCDF4 as nc
import zarr

In [2]:
cesm2_path = '/global/scratch/users/harsha/LENS/cesm2/'

In [8]:
def grayscale_cmap(cmap):
    """Return a grayscale version of the given colormap"""
    cmap = plt.cm.get_cmap(cmap)
    colors = cmap(np.arange(cmap.N))
    
    # convert RGBA to perceived grayscale luminance
    # cf. http://alienryderflex.com/hsp.html
    RGB_weight = [0.299, 0.587, 0.114]
    luminance = np.sqrt(np.dot(colors[:, :3] ** 2, RGB_weight))
    colors[:, :3] = luminance[:, np.newaxis]
        
    return LinearSegmentedColormap.from_list(cmap.name + "_gray", colors, cmap.N)
    

def view_colormap(cmap):
    """Plot a colormap with its grayscale equivalent"""
    cmap = plt.cm.get_cmap(cmap)
    colors = cmap(np.arange(cmap.N))
    
    cmap = grayscale_cmap(cmap)
    grayscale = cmap(np.arange(cmap.N))
    
    fig, ax = plt.subplots(2, figsize=(6, 2),
                           subplot_kw=dict(xticks=[], yticks=[]))
    ax[0].imshow([colors], extent=[0, 10, 0, 1])
    ax[1].imshow([grayscale], extent=[0, 10, 0, 1])

In [4]:
#job_extra = ["--qos=cf_lowprio",'--account=ac_cumulus'] 
job_extra =['--qos=lr6_lowprio','--constraint=lr6_m192'] 
cluster = dask_jobqueue.SLURMCluster(queue="lr6", cores=12, walltime='10:00:00', 
                local_directory='/global/scratch/users/harsha/dask_space/', 
                log_directory='/global/scratch/users/harsha/dask_space/', 
                job_extra_directives=job_extra, interface='eth0', memory="192GB") 
client = Client(cluster) 
cluster.scale_up(15)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 39085 instead


In [5]:
cluster

Tab(children=(HTML(value='<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-outpu…

In [6]:
directory   = "/global/scratch/users/harsha/LENS/cesm2/tasmax/TREFHTMX/"
directory0  = "/global/scratch/users/harsha/LENS/cesm2/tasmax"
files       = glob.glob(directory+"b.e21.BSSP370*.nc")
datelen     = 20
emembers    = [f.replace(directory,"").replace("b.e21.BSSP370","").\
               replace("cam.h1.TREFHTMX.","").\
               replace("f09_g17.LE2-","").replace("cmip6.",""). \
               replace("smbb.","") for f in glob.glob(directory+"b.e21.BSSP370*.nc")] 
emems       = emembers
emembers    = [f.replace(f[-datelen:],"") for f in emembers]
emembers    = list(set(emembers))
emembers    = sorted(emembers)
#print(emembers)
dates       = [f[-datelen:].replace('.nc',"") for f in emems]
#emembers    = [f.replace(f[-datelen:],"") for f in emems]
emembers    = list(set(emembers)) #remove repetitions
emembers    = sorted(emembers) #sort in ascending order
dates       = list(set(dates)) #remove repetitions
dates       = sorted(dates)
dates

['20150101-20241231',
 '20250101-20341231',
 '20350101-20441231',
 '20450101-20541231',
 '20550101-20641231',
 '20650101-20741231',
 '20750101-20841231',
 '20850101-20941231',
 '20950101-21001231']

In [7]:
dates0 = [ii[:8] for ii in dates]
dates0
dates1 = [jj[9:] for jj in dates]


In [8]:
#Create zarr paths
scratchf  = '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/'
scratchf1 = '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/'
filename  = 'b.e21.SSP370.'
path      = scratchf+filename
emembs    = list(range(100))
emembs    = [str(ememb).zfill(3) for ememb in emembs]
paths     = []
paths1    = []
for ii in range(100):
    path00 = scratchf+filename+emembs[ii]+'.zarr'
    paths.append(path00)
for jj in range(9):
    path0  = scratchf1+filename+'tasmax.'+ dates[jj]+'.zarr'
    paths1.append(path0)

In [9]:
print(paths1)

['/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20150101-20241231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20250101-20341231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20350101-20441231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20450101-20541231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20550101-20641231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20650101-20741231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20750101-20841231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20850101-20941231.zarr', '/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_time/b.e21.SSP370.tasmax.20950101-21001231.zarr']


In [10]:
%%time
#Create a file1, with file paths sorted by time for each ensemble member.
cmip0 = 'b.e21.BSSP370cmip6.f09_g17.LE2-'
smbb0 = 'b.e21.BSSP370smbb.f09_g17.LE2-'
file1 = []
for i in range(100):
    filet0  = sorted(glob.glob(directory+cmip0+emembers[i]+'*'+'.nc'))
    filet1  = sorted(glob.glob(directory+smbb0+emembers[i]+'*'+'.nc'))
    file01  = filet0 +filet1
    file1.append(file01)
    #file = sorted(glob.glob(directory+cmip0+emembers[i]+'*.nc'))
    #file1.append(file)
    #if len(file1[i])==0:
    #       file1[i] = glob.glob(directory+smbb0+emembers[i]+'*.nc')
#file1 is now a list of lists.

CPU times: user 1.04 s, sys: 165 ms, total: 1.21 s
Wall time: 1.23 s


In [20]:
%%time
#Create a file2, with file paths sorted by time for each ensemble member.
file2 = []
cmip0 = 'b.e21.BSSP370cmip6.f09_g17.LE2-'
smbb0 = 'b.e21.BSSP370smbb.f09_g17.LE2-'
for j in range(9):
    filet0  = sorted(glob.glob(directory+cmip0+'*'+dates[j]+'.nc'))
    filet1  = sorted(glob.glob(directory+smbb0+'*'+dates[j]+'.nc'))
    file01 = filet0 +filet1
    file2.append(file01)

CPU times: user 856 ms, sys: 123 ms, total: 979 ms
Wall time: 920 ms


In [42]:
#file1[2]

In [11]:
%%time
#Write zarr files where each zarr file has all time series data for one ensemble.
for jj in range(100):
    ds = xr.open_mfdataset(file1[jj],decode_times=True,decode_timedelta='True')
    ds = ds.TREFHTMX
    ds = ds.expand_dims(dim='member',axis=3).assign_coords(member=('member',[jj]))
    #print(ds)
    ds = ds.to_dataset()
    print(paths[jj])
    ds.to_zarr(paths[jj],mode='w')
    #print('Wrote zarr file.')
    ds.close()
    print('Closed directory')

/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.000.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.001.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.002.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.003.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.004.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.005.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.006.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.007.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.008.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.009.zarr
Closed directory
/global/scratch/user

Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.084.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.085.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.086.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.087.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.088.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.089.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.090.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.091.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.092.zarr
Closed directory
/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.093.zarr
Closed directory
/gl

In [33]:
%%time
ds = xr.open_mfdataset(file1[jj],decode_times=True,decode_timedelta='True')

CPU times: user 15.3 s, sys: 1.01 s, total: 16.3 s
Wall time: 20.3 s


In [39]:
paths[60]

'/global/scratch/users/harsha/LENS/cesm2/tasmax/ssptmax_mem/b.e21.SSP370.060.zarr'

In [12]:
%%time
ds_temp = xr.open_mfdataset(paths,engine='zarr')

CPU times: user 22.6 s, sys: 629 ms, total: 23.2 s
Wall time: 23.2 s


In [13]:
ds_temp

Unnamed: 0,Array,Chunk
Bytes,646.63 GiB,769.92 MiB
Shape,"(31391, 192, 288, 100)","(3650, 192, 288, 1)"
Count,1900 Tasks,900 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 646.63 GiB 769.92 MiB Shape (31391, 192, 288, 100) (3650, 192, 288, 1) Count 1900 Tasks 900 Chunks Type float32 numpy.ndarray",31391  1  100  288  192,

Unnamed: 0,Array,Chunk
Bytes,646.63 GiB,769.92 MiB
Shape,"(31391, 192, 288, 100)","(3650, 192, 288, 1)"
Count,1900 Tasks,900 Chunks
Type,float32,numpy.ndarray


In [14]:
%%time
ds_temp.to_zarr(cesm2_path+'tasmax/ssp370_cesm2tmax.zarr')

CPU times: user 18.5 s, sys: 683 ms, total: 19.2 s
Wall time: 1min 33s


<xarray.backends.zarr.ZarrStore at 0x2b13812bf0b0>

In [15]:
%%time
ds_t = xr.open_zarr(cesm2_path+'tasmax/ssp370_cesm2tmax.zarr')

CPU times: user 153 ms, sys: 25.4 ms, total: 179 ms
Wall time: 201 ms


In [16]:
ds_t

Unnamed: 0,Array,Chunk
Bytes,646.63 GiB,769.92 MiB
Shape,"(31391, 192, 288, 100)","(3650, 192, 288, 1)"
Count,901 Tasks,900 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 646.63 GiB 769.92 MiB Shape (31391, 192, 288, 100) (3650, 192, 288, 1) Count 901 Tasks 900 Chunks Type float32 numpy.ndarray",31391  1  100  288  192,

Unnamed: 0,Array,Chunk
Bytes,646.63 GiB,769.92 MiB
Shape,"(31391, 192, 288, 100)","(3650, 192, 288, 1)"
Count,901 Tasks,900 Chunks
Type,float32,numpy.ndarray
