In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [14]:
import intake

import pandas as pd

import dask
from dask.distributed import Client

from joblib import load

from metsim.methods import mtclim
from met_ml.train.fluxnet_etl import get_fluxnet, get_meta

In [2]:
!mkdir -p ../data/metsim

In [3]:
client = Client(n_workers=18, threads_per_worker=2)
client

0,1
Client  Scheduler: tcp://127.0.0.1:43980  Dashboard: proxy/8787/status,Cluster  Workers: 18  Cores: 36  Memory: 85.90 GB


In [15]:
cat = intake.Catalog("../data/fluxnet/catalog.yml")
all_site_meta = pd.read_excel("../data/fluxnet/FLX_AA-Flx_BIF_LATEST.xlsx").set_index(
    ["SITE_ID", "VARIABLE"]
)["DATAVALUE"]

meta = get_meta(all_site_meta)
meta = pd.DataFrame.from_dict(meta, orient="index")

display(meta.head())


Unnamed: 0,lat,lon,elev
AR-SLu,-33.4648,-66.4598,508.0
AR-Vir,-28.2395,-56.1886,105.0
AT-Neu,47.11667,11.3175,970.0
AU-Ade,-13.0769,131.1178,76.0
AU-ASM,-22.283,133.249,606.0


In [4]:
fluxnet_df = get_fluxnet(cat, all_site_meta, from_cache=False)

SITE_ID  VARIABLE            
AR-SLu   MAP                                      400
         COUNTRY                            Argentina
         DOI                     10.18140/FLX/1440191
         DOI_DATAPRODUCT                  FLUXNET2015
         DOI_CONTRIBUTOR_NAME       Patricio Magliano
Name: DATAVALUE, dtype: object

In [7]:
fluxnet_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,P,t_min,t_max,SW_IN_F,LW_IN_F,PA_F,RH,t,lat,elev
Unnamed: 0_level_1,TIMESTAMP_START,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AR-SLu,2009-01-01,0.0,18.632,23.105,79.239667,380.892,95.294625,,1,-33.4648,508.0
AR-SLu,2009-01-02,0.0,17.421,26.319,254.912292,369.2315,95.597354,,2,-33.4648,508.0
AR-SLu,2009-01-03,0.0,16.709,30.4,342.819187,346.828625,95.650583,,3,-33.4648,508.0
AR-SLu,2009-01-04,0.0,20.551,33.735,410.918083,338.0235,95.272437,,4,-33.4648,508.0
AR-SLu,2009-01-05,0.0,21.835,32.551,306.377375,367.396125,94.955813,,5,-33.4648,508.0


In [16]:
from metsim.metsim import MetSim, wrap_run_cell
import metsim.constants as cnst

param_keys = ['sw_prec_thresh', 'lw_cloud', 'lw_type', 'tday_coef',
              'tdew_tol', 'tmax_daylength_fraction', 'rain_scalar',
              'lapse_rate', 'utc_offset', 'prec_type', 'calendar']
params = {key: MetSim.params[key] for key in param_keys}
params['time_step'] = 60
# params['utc_offset'] = False
params

{'sw_prec_thresh': 0.0,
 'lw_cloud': 'cloud_deardorff',
 'lw_type': 'prata',
 'tday_coef': 0.45,
 'tdew_tol': 1e-06,
 'tmax_daylength_fraction': 0.67,
 'rain_scalar': 0.75,
 'lapse_rate': 0.0065,
 'utc_offset': False,
 'prec_type': 'uniform',
 'calendar': 'standard',
 'time_step': 60}

In [94]:
@dask.delayed
def run_metsim(df, site_meta, disagg=True):

    df['dtr'] = df['t_max'] - df['t_min']
    df['smoothed_dtr'] = df['dtr'].rolling(30).mean()
    df['seasonal_prec'] = cnst.DAYS_PER_YEAR * df['P'].rolling(90).mean()
    df['lat'] = site_meta['lat']
    df['lon'] = site_meta['lon']
    df['elev'] = site_meta['elev']
    df.index.name = 'time'

    ds = df.rename(columns={'P': 'prec'}).to_xarray().isel(time=slice(89, None))
    state = df.rename(columns={'P': 'prec'}).to_xarray().isel(time=slice(None, 90))
    out_times = pd.date_range(ds.indexes['time'][0], ds.indexes['time'][-1]+pd.Timedelta('23H'), freq='1H')
    
    if disagg:
        variables = ['shortwave', 'temp', 'vapor_pressure', 'rel_humid', 'air_pressure',
                     'spec_humid', 'tskc', 'longwave', 'prec']
    else:
        variables = ['daylength', 'potrad', 'tt_max', 't_day', 'tfmax', 'tskc', 'tdew',
              'vapor_pressure', 'shortwave', 'pet']
    
    return wrap_run_cell(mtclim.run, params, ds, state, disagg, out_times)[0][variables]


def to_daily(df):
    out = df.resample('1D').mean()
    out['prec'] = df.resample('1D').sum()
    return out

@dask.delayed
def write_metsim(df, name, suffix):
    fname = f'../data/metsim/metsim_{name}_{suffix}.csv'
    df.to_csv(fname)
    return fname

In [99]:
tasks = []
for name, site_meta in meta.iterrows():
    df = fluxnet_df.loc[name]
#     print(name, site_meta)
    out = run_metsim(df, site_meta, disagg=True)
    tasks.append(write_metsim(out, name, 'HH'))
    out = run_metsim(df, site_meta, disagg=False)
    tasks.append(write_metsim(out, name, 'DD'))

In [102]:
test = dask.persist(tasks)[0]
test

[Delayed('write_metsim-b636880b-0c8b-48dd-ad81-e62e57adc84a'),
 Delayed('write_metsim-002a3a31-1f69-4ce7-9bc8-d51023acb935'),
 Delayed('write_metsim-0de36960-9454-4e0b-b0d5-f4ca18719833'),
 Delayed('write_metsim-d213699c-504a-4351-91a0-2c274e39a7e8'),
 Delayed('write_metsim-d9f6e0d8-c795-4af7-958b-77ce55acff38'),
 Delayed('write_metsim-ac6e8c70-d7f2-4c57-b774-041b59420011'),
 Delayed('write_metsim-a2022b30-5f5e-4d5a-a502-2fbe1fb17e5d'),
 Delayed('write_metsim-8605f68a-3914-4e25-9bca-2733f6bd1543'),
 Delayed('write_metsim-e3e3a2c4-3f40-4eb0-b7fa-254cdc79103e'),
 Delayed('write_metsim-60b7f876-92de-429a-9d11-2a0c5549f959'),
 Delayed('write_metsim-0f3257e2-602d-4f98-b564-95418d21b307'),
 Delayed('write_metsim-f5eb980e-ca00-4f93-ad4d-3ec99cd4ce60'),
 Delayed('write_metsim-513efe26-ffc8-4b69-96e0-14ac01d0a93a'),
 Delayed('write_metsim-e9b1778d-893f-4a7e-8cf0-3eab23767f36'),
 Delayed('write_metsim-ff75f181-f88a-4de2-8460-c5728ed9b6c3'),
 Delayed('write_metsim-ed74ee05-ccd3-45be-a61c-69051284