In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import os
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta

In [58]:
file_location = "/glade/collections/rda/data/ds313.3/0.9x1.25"
start_year=2020; end_year=2023
name_list = []
MERRA2_name = "MERRA2_0.9x1.25_YYYYMMDD.nc"
start_date = date(start_year, 1, 1)
end_date = date(end_year, 1, 1)
delta_days = (end_date - start_date).days
datasets = []
for d in range(delta_days):
    date_now = start_date + relativedelta(days=d)
    folder = str(date_now.year)
    date_now = str(date_now.strftime("%Y%m%d"))
    filename = os.path.join(file_location, folder, MERRA2_name.replace("YYYYMMDD",date_now))
    
    if os.path.exists(filename):
        f_in = xr.open_dataset(filename)
    else:
        err_msg = "File not found : " + filename
        exit(err_msg)
    datasets.append(f_in[['PS', 'SHFLX', 'FSDS', 'T']].sel(lat='43.888', lon='273.888', method='nearest'))
    f_in.close()

combined = xr.concat(datasets, dim='time')
del datasets

In [2]:
def read_BOOI4(file_name, isname='TRUE'):
    if (isname):
       file_location = os.path.join(os.getcwd(), file_name)
    else:
       file_location = file_name

    dsBOOI4 = pd.read_csv(file_location, sep=',')
    dsBOOI4['valid'] = pd.to_datetime(dsBOOI4['valid'])
    dsBOOI4 = dsBOOI4.set_index('valid')

    # Calculate the specific humidity.
    dsBOOI4['tmpk'] = (dsBOOI4['tmpf'] - 32) * 5/9 + 273.15

    return dsBOOI4

In [5]:
a = read_BOOI4(file_name = "/glade/scratch/jinmuluo/MESOCOM_fluxtower_BOOI4/isusm_BOOI4_modified.txt", isname='FALSE')

In [8]:
test = a.index[a['speed'].values == 0].shape

(2404,)

In [62]:
# Make sure the last year of MERRA2 has 23:00 hours.  
t_hourly = pd.date_range(combined['time'].min().values, combined['time'].max().values + np.timedelta64(2,'h'), freq='H')
combined_hourly = combined.interp(time=t_hourly, method="linear")
combined_hourly = combined_hourly.interpolate_na(dim="time", method="linear", fill_value="extrapolate")
combined_hourly

In [67]:
print(combined_hourly['time'].min().values, combined_hourly['time'].max().values)
combined_hourly.sel(time=slice('2014-01-01', '2014-01-31'))
combined_hourly.sel(time=a.index.values[a['relh'].values == -99])

2020-01-01T00:00:00.000000000 2022-12-31T23:00:00.000000000


<bound method Mapping.values of <xarray.Dataset>
Dimensions:  (time: 4645)
Coordinates:
    lat      float32 43.82
    lon      float32 273.7
  * time     (time) datetime64[ns] 2020-12-08T21:00:00 ... 2021-06-29T19:00:00
Data variables:
    PS       (time) float64 9.768e+04 9.754e+04 ... 9.854e+04 9.853e+04
    SHFLX    (time) float64 -21.55 -16.45 -18.52 -20.58 ... 62.82 72.46 52.76
Attributes:
    Conventions:  CF-1.6>