In [15]:
import enspp.bma as bma
import pandas as pd
from wrfpywind import data_preprocess as pp
import xarray as xr

In [3]:
# Path to data
datadir = '../data/'

In this data directory, there should be a list of data files for each initialization time. For example
- ensds_20191201-05.nc
- ensds_20191202-06.nc
- ensds_20191203-07.nc
- etc. 


I think I will use 12/9 as the test case for the AGU as that will give me 26 total days of testing data. However, for now, I'm going to use 12/8 as that's the last that I've finished processing so far.

In [4]:
# Specify the forecast initialization time
t_init = '2019-12-08'
t_init = pd.to_datetime(t_init)

In [5]:
# Decide how many days of data you would like to use
n_days = 7

In [8]:
# Find the first training day
d1_training = t_init - pd.DateOffset(days=n_days)

In [9]:
# Specify the start dates  
start_dates = pd.date_range(d1_training, periods=n_days)

# Specify the end dates by specifying how long these simlulations should last
end_dates = start_dates + pd.DateOffset(days=4)

In [6]:
# Read in the observational data
obs = pp.fmt_buoy_wspd(
                       data_path='/share/mzhang/jas983/wrf_data/oshwind/wrfpywind/wrfpywind/data/nyserda_buoy/', 
                       south_dates_str='20190904_20210207', north_dates_str='20190812_20210207', 
                       heights=[20, 40, 60, 80, 100, 120, 140, 160, 180, 200],
                       start_date='12-01-2019', end_date='12-31-2019')

I will refer to the current day as day `D`. I retrain the BMA predictive parameters every new initialization time (i.e., every day), and fit BMA predictive distributions using the same fit for each lead time in a given forecast.

In [30]:
# Open the xarray Dataset contianing wind speed data for the entire domain 
# note that you must use a `Dataset` object for the `extract_buoy_da` function to work.
for ii in range(0,len(start_dates)):
    ensds = xr.open_dataset(f"{datadir}ensds_{start_dates[ii].strftime('%Y%m%d')}-{end_dates[ii].strftime('%d')}.nc")

    # Get data only at the buoy locations
    ensda = pp.extract_buoy_da(ensds, varname='wspd_wrf', locations=['south', 'north'])

    # Combine ensemble data and training data into a pd.DataFrame in the correct format
    train_data_new = bma.fmt_training_data(ensda, obs)

    if ii == 0:
        # Create the train_data DataFrame
        train_data = train_data_new
    else:
        # Concat the new data into the same training DataFrame 
        train_data = pd.concat([train_data, train_data_new], axis=0)

In [None]:
# Finally remove any data from after the WRF initialization time
train_data = train_data[train_data['Time'] < t_init]


model,Time,Obs,Lee 2017,Draxl 2014a,Draxl 2014b,Veron 2018,Optis 2021
0,2019-12-01 00:00:00,7.6415,2.851422,2.851422,2.851422,2.851422,2.851422
1,2019-12-01 00:10:00,7.7717,2.849505,2.772827,2.841808,2.836270,2.849560
2,2019-12-01 00:20:00,8.2606,3.025170,2.926286,3.034888,3.039857,3.026566
3,2019-12-01 00:30:00,8.1559,3.404312,3.311843,3.427143,3.420393,3.398015
4,2019-12-01 00:40:00,7.1789,3.571643,3.531106,3.631666,3.636799,3.570106
...,...,...,...,...,...,...,...
1136,2019-12-04 23:20:00,9.6691,6.322960,8.222566,7.480564,8.172853,5.446856
1137,2019-12-04 23:30:00,10.0402,6.552176,7.883459,7.452651,8.210824,5.296691
1138,2019-12-04 23:40:00,11.0066,6.551813,8.035707,7.284772,7.723154,5.178216
1139,2019-12-04 23:50:00,10.2970,6.299360,8.077747,7.004590,7.098296,5.105103
