In [1]:
import numpy as np
import pandas as pd
import xarray as xr

import dask
dask.config.set(**{'array.slicing.split_large_chunks': False})

<dask.config.set at 0x2b6843b165c0>

In [2]:
############
# Dask
############
from dask_jobqueue import PBSCluster

cluster = PBSCluster(
    cores=1,
    memory="40GB",
    resource_spec="pmem=40GB",
    # account='open',
    worker_extra_args=["#PBS -l feature=rhel7"],
    walltime="00:30:00",
)

cluster.scale(jobs=20)  # ask for jobs

from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.219:39722,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [3]:
############################
######## Set paths #########
############################

crop_data_path = "/gpfs/group/kaf26/default/private/WBM_data/squam.sr.unh.edu/US_CDL_v3_data/crops/MIRCA_CDL-US-M"
out_path = "/gpfs/group/kaf26/default/dcl5300/wbm_soilM_crop_uc_lafferty-etal-2024-tbd_DATA/nonclim_proj_inputs"

## Non-climate inputs for projections

In [None]:
%%time
# Fallow fraction
ds_fallow_fr = xr.concat([xr.open_dataset(f"{crop_data_path}/crop_fraction/{str(year)}/MC_fallow_fr.nc", chunks="auto") for year in range(2009,2016)], dim="time")
ds_fallow_fr.chunk({"time":-1, "lat":100, "lon":100}).groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/MC_fallow_fr.nc")

In [None]:
# Non-Rice Irr AwCap
ds_nonrice_irr_awCap_av = xr.concat([xr.open_dataset(f"{crop_data_path}/awCap_av/{str(year)}/MC_nonrice_irr_awCap_av.nc") for year in range(2009,2017)], dim="time")
ds_nonrice_irr_awCap_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/nonrice_irr_awCap_av.nc")

# Non-Rice Irr CDF
ds_nonrice_irr_CDF_av = xr.concat([xr.open_dataset(f"{crop_data_path}/CDF_av/{str(year)}/MC_nonrice_irr_CDF_av.nc") for year in range(2009,2017)], dim="time")
ds_nonrice_irr_CDF_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/nonrice_irr_CDF_av.nc")

# Non-Rice Irr Fr
ds_nonrice_irr_fr_av = xr.concat([xr.open_dataset(f"{crop_data_path}/crop_fraction_av/{str(year)}/MC_nonrice_irr_fr_av.nc") for year in range(2009,2017)], dim="time")
ds_nonrice_irr_fr_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/nonrice_irr_fr_av.nc")

# Non-Rice Irr Kc
ds_nonrice_irr_Kc_av = xr.concat([xr.open_dataset(f"{crop_data_path}/Kc_av/{str(year)}/MC_nonrice_irr_Kc_av.nc") for year in range(2009,2017)], dim="time")
ds_nonrice_irr_Kc_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/nonrice_irr_Kc_av.nc")

# Rainfed AwCap
ds_rfd_awCap_av = xr.concat([xr.open_dataset(f"{crop_data_path}/awCap_av/{str(year)}/MC_rfd_awCap_av.nc") for year in range(2009,2017)], dim="time")
ds_rfd_awCap_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rfd_awCap_av.nc")

# Rainfed Fr
ds_rfd_fr_av = xr.concat([xr.open_dataset(f"{crop_data_path}/crop_fraction_av/{str(year)}/MC_rfd_fr_av.nc") for year in range(2009,2017)], dim="time")
ds_rfd_fr_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rfd_fr_av.nc")

# Rainfed Kc
ds_rfd_Kc_av = xr.concat([xr.open_dataset(f"{crop_data_path}/Kc_av/{str(year)}/MC_rfd_Kc_av.nc") for year in range(2009,2017)], dim="time")
ds_rfd_Kc_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rfd_Kc_av.nc")

# Rice AddedWater
ds_rice_irr_AddedWater_av = xr.concat([xr.open_dataset(f"{crop_data_path}/added_water/{str(year)}/MC_rice_irr_AddedWater_av.nc") for year in range(2009,2017)], dim="time")
ds_rice_irr_AddedWater_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rice_irr_AddedWater_av.nc")

# Rice Irr AwCap
ds_rice_irr_awCap_av = xr.concat([xr.open_dataset(f"{crop_data_path}/awCap_av/{str(year)}/MC_rice_irr_awCap_av.nc") for year in range(2009,2017)], dim="time")
ds_rice_irr_awCap_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rice_irr_awCap_av.nc")

# Rice Irr CDF
ds_rice_irr_CDF_av = xr.concat([xr.open_dataset(f"{crop_data_path}/CDF_av/{str(year)}/MC_rice_irr_CDF_av.nc") for year in range(2009,2017)], dim="time")
ds_rice_irr_CDF_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rice_irr_CDF_av.nc")

# Rice Irr Fr
ds_rice_irr_fr_av = xr.concat([xr.open_dataset(f"{crop_data_path}/crop_fraction_av/{str(year)}/MC_rice_irr_fr_av.nc") for year in range(2009,2017)], dim="time")
ds_rice_irr_fr_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rice_irr_fr_av.nc")

# Rice Irr Kc
ds_rice_irr_Kc_av = xr.concat([xr.open_dataset(f"{crop_data_path}/Kc_av/{str(year)}/MC_rice_irr_Kc_av.nc") for year in range(2009,2017)], dim="time")
ds_rice_irr_Kc_av.groupby("time.dayofyear").mean(dim="time").to_netcdf(f"{out_path}/rice_irr_Kc_av.nc")

In [None]:
ds_cldtot = xr.merge([xr.open_dataset(f"{data_path})}/MERRA2.CLDTOT.{str(year)}-_MONTH_-_DAY_.nc") for year in range(2009,2017)])