-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
added option to use forcing start/stop dates and misc improvements to… #93
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ dependencies: | |
- pandas | ||
- netcdf4 | ||
- numba | ||
- bottleneck | ||
- numpy | ||
- scipy | ||
- pytest | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ dependencies: | |
- xarray | ||
- pandas | ||
- netcdf4 | ||
- bottleneck | ||
- numba | ||
- numpy | ||
- scipy | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -148,12 +148,83 @@ def __init__(self, params: dict): | |
logger.setLevel(self.params['verbose']) | ||
ch.setLevel(self.params['verbose']) | ||
logger.addHandler(ch) | ||
logger.info("read_domain") | ||
self.domain = io.read_domain(self.params) | ||
self._normalize_times() | ||
logger.info("read_met_data") | ||
self.met_data = io.read_met_data(self.params, self.domain) | ||
self._validate_force_times(force_times=self.met_data['time']) | ||
logger.info("read_state") | ||
self.state = io.read_state(self.params, self.domain) | ||
self.met_data['elev'] = self.domain['elev'] | ||
self.met_data['lat'] = self.domain['lat'] | ||
logger.info("_aggregate_state") | ||
self._aggregate_state() | ||
logger.info("load_inputs") | ||
self.load_inputs() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wary of having There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm skeptical that this is actually a problem. This is all happening before we startup MetSim's multiprocessing which is where I'd image any odd behavior came from. The reason I like this method is that we fully define exactly what data we need in the previous lines before we ever load any data. I think this is preferable to blinding loading data we don't know if we need. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good point, and I think it's reasonable to say that no issues will occur after looking at it a bit more. I'm still a bit sensitive to that bug because it took so long to track down. |
||
|
||
def _normalize_times(self): | ||
# handle when start/stop times are not specified | ||
for p in ['start', 'stop']: | ||
# TODO: make sure these options get documented | ||
if self.params[p] in [None, 'forcing', '']: | ||
self.params[p] = None | ||
elif isinstance(self.params[p], str): | ||
if ':' in self.params[p]: | ||
# start from config file | ||
date, hour = self.params[p].split(':') | ||
year, month, day = date.split('/') | ||
self.params[p] = pd.datetime(int(year), int(month), | ||
int(day), int(hour)) | ||
elif '/' in self.params[p]: | ||
# end from config file | ||
year, month, day = self.params[p].split('/') | ||
self.params[p] = pd.datetime(int(year), int(month), | ||
int(day)) | ||
else: | ||
self.params[p] = pd.to_datetime(self.params[p]) | ||
else: | ||
self.params[p] = pd.to_datetime(self.params[p]) | ||
|
||
logger.info('start {}'.format(self.params['start'])) | ||
logger.info('stop {}'.format(self.params['stop'])) | ||
|
||
def _validate_force_times(self, force_times): | ||
|
||
for p, i in [('start', 0), ('stop', -1)]: | ||
# infer times from force_times | ||
if self.params[p] is None: | ||
self.params[p] = pd.Timestamp( | ||
force_times.values[i]).to_pydatetime() | ||
|
||
assert self.params['start'] >= pd.Timestamp( | ||
force_times.values[0]).to_pydatetime() | ||
assert self.params['stop'] <= pd.Timestamp( | ||
force_times.values[-1]).to_pydatetime() | ||
|
||
self.params['state_start'] = (self.params['start'] - | ||
pd.Timedelta("90 days")) | ||
self.params['state_stop'] = (self.params['start'] - | ||
pd.Timedelta("1 days")) | ||
logger.info('start {}'.format(self.params['start'])) | ||
logger.info('stop {}'.format(self.params['stop'])) | ||
|
||
logger.info('force start {}', pd.Timestamp( | ||
force_times.values[0]).to_pydatetime()) | ||
logger.info('force stop {}', pd.Timestamp( | ||
force_times.values[-1]).to_pydatetime()) | ||
|
||
logger.info('state start {}'.format(self.params['state_start'])) | ||
logger.info('state stop {}'.format(self.params['state_stop'])) | ||
|
||
def load_inputs(self, close=True): | ||
self.domain = self.domain.load() | ||
self.met_data = self.met_data.load() | ||
self.state = self.state.load() | ||
if close: | ||
self.domain.close() | ||
self.met_data.close() | ||
self.state.close() | ||
|
||
def launch(self): | ||
"""Farm out the jobs to separate processes""" | ||
|
@@ -307,6 +378,9 @@ def setup_output(self, prototype: xr.Dataset=None): | |
# Need to convert some parameters to strings | ||
if k in ['start', 'stop', 'annual', 'mtclim_swe_corr']: | ||
v = str(v) | ||
elif k in ['state_start', 'state_stop']: | ||
# skip | ||
continue | ||
# Don't include complex types | ||
if isinstance(v, dict): | ||
v = json.dumps(v) | ||
|
@@ -326,31 +400,37 @@ def setup_output(self, prototype: xr.Dataset=None): | |
def _aggregate_state(self): | ||
"""Aggregate data out of the state file and load it into `met_data`""" | ||
# Precipitation record | ||
begin_record = self.params['start'] - pd.Timedelta("90 days") | ||
end_record = self.params['start'] - pd.Timedelta("1 days") | ||
record_dates = date_range(begin_record, end_record, | ||
|
||
assert self.state.dims['time'] == 90 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I really don't like this check, because it seems to limit some cases that are pretty useful in my mind. For example, if you have a long timeseries broken up into 1 year chunks you could previously just use the previous year's data as your state file. Now it looks like you'd have to preprocess all of those files and create an additional state file for each years input. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really, our There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm open to other ideas for how to pin down the behavior in this method but I was finding it useful to explicitly check the state shape before going into the computations in this method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I didn't catch that's what you were doing with the |
||
|
||
record_dates = date_range(self.params['state_start'], | ||
self.params['state_stop'], | ||
calendar=self.params['calendar']) | ||
trailing = self.state['prec'].sel(time=record_dates) | ||
trailing = self.state['prec'] | ||
trailing['time'] = record_dates | ||
total_precip = xr.concat([trailing, self.met_data['prec']], dim='time') | ||
total_precip = (cnst.DAYS_PER_YEAR * total_precip.rolling( | ||
time=90).mean().drop(record_dates, dim='time')) | ||
time=90).mean().sel(time=slice(self.params['start'], | ||
self.params['stop']))) | ||
|
||
self.met_data['seasonal_prec'] = total_precip | ||
|
||
# Smoothed daily temperature range | ||
trailing = self.state['t_max'] - self.state['t_min'] | ||
begin_record = self.params['start'] - pd.Timedelta("90 days") | ||
end_record = self.params['start'] - pd.Timedelta("1 days") | ||
record_dates = date_range(begin_record, end_record, | ||
calendar=self.params['calendar']) | ||
|
||
trailing['time'] = record_dates | ||
dtr = self.met_data['t_max'] - self.met_data['t_min'] | ||
sm_dtr = xr.concat([trailing, dtr], dim='time') | ||
sm_dtr = sm_dtr.rolling(time=30).mean().drop(record_dates, dim='time') | ||
self.met_data['smoothed_dtr'] = sm_dtr | ||
|
||
# Put in SWE data | ||
self.state['swe'] = self.state.sel(time=end_record).swe.drop('time') | ||
self.met_data['swe'] = self.state.swe.copy() | ||
if 'time' in self.state['swe'].dims: | ||
self.state['swe'] = self.state['swe'].sel( | ||
time=self.params['state_stop']).drop('time') | ||
else: | ||
self.state['swe'] = self.state['swe'] | ||
self.met_data['swe'] = self.state['swe'].copy() | ||
|
||
def _validate_setup(self): | ||
"""Updates the global parameters dictionary""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't this have the chance to break? For example, if
start=None
andstop!=None
we will end up with (in the line below)ds = ds.sel(time=slice(None, stop))
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ds = ds.sel(time=slice(None, stop))
will work. Just like this works:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting, I had no idea that you could do that.