In [1]:
import xarray as xr
from pathlib import Path
import os
from dask.distributed import Client
import numpy as np

In [2]:
# REZ mask
mask_file = "/home/548/cd3022/aus-historical-solar-droughts/data/boundary_files/REZ_mask.npz"
loaded_mask = np.load(mask_file)
mask = loaded_mask["mask"]

# Define processing function

In [3]:
def solar_generation_processing(ds):
    print(ds)
    print(ds.latitude)
    # apply region mask to lat/lon coordinates
    mask_da = xr.DataArray(mask, coords={"latitude": ds.latitude, "longitude": ds.longitude}, dims=["latitude", "longitude"])
    masked_ds = ds.where(mask_da, drop=True)
    
    # get irradiance data, ensuring to flatten and remove all unnecessary nan values
    ghi = masked_ds.surface_global_irradiance.values.ravel()
    dni = masked_ds.direct_normal_irradiance.values.ravel()
    dhi = masked_ds.surface_diffuse_irradiance.values.ravel()
    nan_mask = np.isnan(ghi) # same shape as dni, dhi
    ghi_clean = ghi[~nan_mask]
    dni_clean = dni[~nan_mask]
    dhi_clean = dhi[~nan_mask]
    
    # get correct time and coordinate data, so that it matches up with the remaining irradiance values
    lat_1d = masked_ds.latitude.values
    lon_1d = masked_ds.longitude.values
    lon_grid, lat_grid = np.meshgrid(lon_1d, lat_1d, indexing="xy")
    lat_grid_1d = lat_grid.ravel()
    lon_grid_1d = lon_grid.ravel()
    lat_1d_expanded = np.tile(lat_grid_1d, dataset.sizes["time"])  # Tile lat for all times
    lon_1d_expanded = np.tile(lon_grid_1d, dataset.sizes["time"])  # Tile lon for all times
    time_1d = np.repeat(masked_ds.time.values, len(lat_grid_1d))  # Repeat time for all lat/lon
    lat_1d_expanded_clean = lat_1d_expanded[~nan_mask]
    lon_1d_expanded_clean = lon_1d_expanded[~nan_mask]
    time_1d_clean = time_1d[~nan_mask]
        
    # calculate capacity factors using pvlib
    # the function defined in utils_V2 is essentially the same as the workflow in pv-output-tilting.ipynb
    actual_ideal_ratio = utils_V2.tilting_panel_pr(
        pv_model = 'Canadian_Solar_CS5P_220M___2009_',
        inverter_model = 'ABB__MICRO_0_25_I_OUTD_US_208__208V_',
        ghi=ghi_clean,
        dni=dni_clean,
        dhi=dhi_clean,
        time=time_1d_clean,
        lat=lat_1d_expanded_clean,
        lon=lon_1d_expanded_clean
    )  
    
    # template to refit data to
    mask_template = masked_ds.surface_global_irradiance
    
    # Now need to get data back in line with coordinates
    # fill cf array with nan values so it can fit back into lat/lon coords
    filled = np.empty_like(ghi)
    # nan values outside the data
    filled[nan_mask] = np.nan
    # add the data to the same mask the input irradiance data was taken from
    filled[~nan_mask] = actual_ideal_ratio
    # convert data back into 3D xarray
    reshaped = filled.reshape(mask_template.shape)
    ratio_da = xr.DataArray(reshaped, coords=mask_template.coords, dims=mask_template.dims)
    return ratio_da

In [4]:
ncpus = os.environ.get('PBS_NCPUS')
c = Client(n_workers=int(ncpus))
c

Perhaps you already have a cluster running?
Hosting the HTTP server on port 34433 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/34433/status,

0,1
Dashboard: /proxy/34433/status,Workers: 12
Total threads: 12,Total memory: 46.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:39231,Workers: 12
Dashboard: /proxy/34433/status,Total threads: 12
Started: Just now,Total memory: 46.00 GiB

0,1
Comm: tcp://127.0.0.1:37985,Total threads: 1
Dashboard: /proxy/33983/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:45269,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-2yblgz2c,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-2yblgz2c

0,1
Comm: tcp://127.0.0.1:38601,Total threads: 1
Dashboard: /proxy/43483/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:40149,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-l14d3kbw,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-l14d3kbw

0,1
Comm: tcp://127.0.0.1:37317,Total threads: 1
Dashboard: /proxy/35071/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:38877,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-r96qfdie,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-r96qfdie

0,1
Comm: tcp://127.0.0.1:39373,Total threads: 1
Dashboard: /proxy/43507/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:35931,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-mf9tvd_n,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-mf9tvd_n

0,1
Comm: tcp://127.0.0.1:45007,Total threads: 1
Dashboard: /proxy/36735/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:38219,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-0kffvpxv,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-0kffvpxv

0,1
Comm: tcp://127.0.0.1:39909,Total threads: 1
Dashboard: /proxy/40665/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:39589,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-hmsneujr,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-hmsneujr

0,1
Comm: tcp://127.0.0.1:40055,Total threads: 1
Dashboard: /proxy/39277/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:44223,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-m20084_2,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-m20084_2

0,1
Comm: tcp://127.0.0.1:35319,Total threads: 1
Dashboard: /proxy/46217/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:41059,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-_a4_jtzs,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-_a4_jtzs

0,1
Comm: tcp://127.0.0.1:38875,Total threads: 1
Dashboard: /proxy/34455/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:34431,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-b_f2oo5k,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-b_f2oo5k

0,1
Comm: tcp://127.0.0.1:38409,Total threads: 1
Dashboard: /proxy/36721/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:41483,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-c74bc0lt,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-c74bc0lt

0,1
Comm: tcp://127.0.0.1:40313,Total threads: 1
Dashboard: /proxy/34997/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:43663,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-zig8u58v,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-zig8u58v

0,1
Comm: tcp://127.0.0.1:40361,Total threads: 1
Dashboard: /proxy/32809/status,Memory: 3.83 GiB
Nanny: tcp://127.0.0.1:36377,
Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-t9z5yy5y,Local directory: /jobfs/137640620.gadi-pbs/dask-scratch-space/worker-t9z5yy5y


In [5]:
directory = Path('/g/data/rv74/satellite-products/arc/der/himawari-ahi/solar/p1s/latest/2020/05/05')
files = list(directory.glob("*.nc"))

In [6]:
%%time
def _preprocess(ds):
    return ds[['surface_global_irradiance', 'direct_normal_irradiance', 'surface_diffuse_irradiance']]
ds = xr.open_mfdataset(files, preprocess=_preprocess, parallel=True)

CPU times: user 2.82 s, sys: 725 ms, total: 3.54 s
Wall time: 14.7 s
