In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt

import glob
import sys
import dask

from CASutils import lensread_utils as lens
from CASutils import readdata_utils as read
from math import nan
dask.config.set(**{'array.slicing.split_large_chunks': True})

<dask.config.set at 0x2af77afe0b38>

In [2]:
topdir="/glade/campaign/cgd/cesm/CESM2-LE/lnd/proc/tseries/day_1/"

In [3]:
landfrac = xr.open_dataset('LANDFRAC_LENS2.nc')
landfrac = landfrac.landfrac

In [4]:
landfrac = xr.where(landfrac > 0.5, 1, nan)

In [5]:
memstr = lens.lens2memnamegen_second50(50)

In [6]:
memstr.remove('1301.017')

In [7]:
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

cluster = PBSCluster(
    cores = 1,
    memory = '50GB',
    processes = 1,
    queue = 'casper',
    local_directory = '$TMPDIR',
    resource_spec = 'select=1:ncpus=1:mem=20GB',
    project='P04010022',
    walltime='02:00:00',
    interface='ib0')

# scale up
cluster.scale(20)

# change your urls to the dask dashboard so that you can see it
dask.config.set({'distributed.dashboard.link':'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'})

# Setup your client
client = Client(cluster)

In [8]:
cluster

In [19]:
#client.close()

In [11]:
client

0,1
Client  Scheduler: tcp://10.12.206.63:39660  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/islas/proxy/8787/status,Cluster  Workers: 20  Cores: 20  Memory: 429.50 GB


In [16]:
#client.close()

In [13]:
for imem in memstr:
    print(imem)
    filelist = sorted(glob.glob(topdir+"FSNO"+"/*-"+imem+"*."+"FSNO"+".*.nc"))
    dat = xr.open_mfdataset(filelist, concat_dim=['time'], coords='minimal')[['FSNO','time_bounds']]
    dat = read.fixcesmtime(dat, timebndsvar='time_bounds')
    dat['lon'] = landfrac.lon ; dat['lat'] = landfrac.lat
    
    lons=[286,174,95,213]
    lats=[59,68,73,61]
    locations=[]
    for i in np.arange(0,len(lons),1):
        loc = dat.sel(lon=lons[i],lat=lats[i], method='nearest')
        locations.append(loc)
    locations = xr.concat(locations, dim='point')
    
    locations = locations.where( ((locations.time.dt.month == 6) | (locations.time.dt.month == 7) | (locations.time.dt.month == 8)), drop=True)
    locations.to_netcdf("/glade/scratch/islas/python/singleforcing/DATA_SORT/FSNO/FSNO_JJA_locations_lens2_"+imem+".nc")

1011.001
1031.002
1051.003
1071.004
1091.005
1111.006
1131.007
1151.008
1171.009
1191.010
1231.011
1231.012
1231.013
1231.014
1231.015
1231.016
1231.017
1231.018
1231.019
1231.020
1251.011
1251.012
1251.013
1251.014
1251.015
1251.016
1251.017
1251.018
1251.019
1251.020
1281.011
1281.012
1281.013
1281.014
1281.015
1281.016
1281.017
1281.018
1281.019
1281.020
1301.011
1301.012
1301.013
1301.014
1301.015
1301.016
1301.018
1301.019
1301.020


In [30]:
for imem in memstr:
    filelist = sorted(glob.glob(topdir+"FSNO"+"/*-"+imem+"*."+"FSNO"+".*.nc"))
    filelist = [sorted(glob.glob(topdir+"FSNO"+"/*-"+imem+"*."+"FSNO"+".*.nc")) for imem in memstr ]


members = [xr.open_mfdataset(i, combine='nested', concat_dim=['time'], coords='minimal')[['FSNO', 'time_bounds']] for i in filelist]


members = [ read.fixcesmtime(members[i], timebndsvar='time_bounds').sel(time=slice("1920-01-01","2050-12-31")) 
           for i in np.arange(0,len(members),1) ]
dat = xr.concat(members, dim='M', join='override', coords='minimal')
dat['lon'] = landfrac.lon ; dat['lat'] = landfrac.lat
dat = dat.where( ( (dat.time.dt.month == 5) | (dat.time.dt.month == 6) | (dat.time.dt.month == 7) ), drop=True )
datmasked = dat.FSNO*landfrac
datmasked = dat.where(dat.lat > 50, drop=True)
datmasked_stacked = datmasked.stack(z=("lon","lat"))
datmasked_stacked = datmasked_stacked.dropna("z")
datmasked_stacked = datmasked_stacked.reset_index("z")
datmasked_stacked.to_netcdf("/glade/scratch/islas/python/singleforcing/DATA_SORT/FSNO/FSNO_JJA_land_50N90N_lens2.nc")

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


KeyboardInterrupt: 

In [29]:
print(dat)

<xarray.Dataset>
Dimensions:      (M: 49, hist_interval: 2, lat: 192, lon: 288, time: 12052)
Coordinates:
  * lon          (lon) float32 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
  * lat          (lat) float32 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
  * time         (time) datetime64[ns] 1920-05-01T12:00:00 ... 2050-07-31T12:...
Dimensions without coordinates: M, hist_interval
Data variables:
    FSNO         (M, time, lat, lon) float32 dask.array<chunksize=(1, 920, 192, 288), meta=np.ndarray>
    time_bounds  (M, time, hist_interval) object dask.array<chunksize=(1, 920, 2), meta=np.ndarray>
Attributes: (12/100)
    title:                                     CLM History file information
    comment:                                   NOTE: None of the variables ar...
    Conventions:                               CF-1.0
    history:                                   created on 03/04/21 21:44:31
    source:                                    Community Land Model CLM4.0
  

In [28]:
print(landfrac)

<xarray.DataArray 'landfrac' (lat: 192, lon: 288)>
array([[ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])
Coordinates:
  * lat      (lat) float32 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0
  * lon      (lon) float32 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8


In [40]:
print(datmasked)

<xarray.Dataset>
Dimensions:  (M: 2, lat: 192, lon: 288, time: 11041)
Coordinates:
  * lon      (lon) float32 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
  * lat      (lat) float32 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0
  * time     (time) datetime64[ns] 1920-03-01T12:00:00 ... 2100-04-30T12:00:00
Dimensions without coordinates: M
Data variables:
    mask     (M, time, lat, lon) float64 dask.array<chunksize=(1, 610, 192, 288), meta=np.ndarray>


In [29]:
print(datmasked_stacked)

<xarray.Dataset>
Dimensions:  (time: 11041, z: 55296)
Coordinates:
  * time     (time) datetime64[ns] 1920-03-01T12:00:00 ... 2100-04-30T12:00:00
  * z        (z) MultiIndex
  - lon      (z) float64 0.0 0.0 0.0 0.0 0.0 ... 358.8 358.8 358.8 358.8 358.8
  - lat      (z) float64 -90.0 -89.06 -88.12 -87.17 ... 87.17 88.12 89.06 90.0
Data variables:
    *empty*


In [19]:
test = members[0]

In [22]:
test2 = test.where((test.time.dt.month == 3) | (test.time.dt.month == 4) | (test.time.dt.month == 5), drop=True)

In [24]:
test3 = test2.where( test2.lat > 40, drop=True)

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [25]:
print(test3)

<xarray.Dataset>
Dimensions:      (hist_interval: 2, lat: 54, lon: 288, time: 16652)
Coordinates:
  * lon          (lon) float32 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
  * lat          (lat) float32 40.05 40.99 41.94 42.88 ... 88.12 89.06 90.0
  * time         (time) datetime64[ns] 1920-03-01T12:00:00 ... 2100-05-31T12:...
Dimensions without coordinates: hist_interval
Data variables:
    FSNO         (time, lat, lon) float32 dask.array<chunksize=(920, 54, 288), meta=np.ndarray>
    time_bounds  (time, hist_interval, lat) object dask.array<chunksize=(920, 2, 54), meta=np.ndarray>
Attributes: (12/100)
    title:                                     CLM History file information
    comment:                                   NOTE: None of the variables ar...
    Conventions:                               CF-1.0
    history:                                   created on 03/04/21 21:44:31
    source:                                    Community Land Model CLM4.0
    hostname:       