Permalink
Browse files

beginning of daily time gridding

  • Loading branch information...
1 parent 0a2f26a commit 70db31781f3ac7cfb0eb29623bb209d95c8abe73 Eva Schiffer committed Feb 22, 2014
Showing with 201 additions and 31 deletions.
  1. +18 −1 stg/constants.py
  2. +41 −0 stg/io_manager.py
  3. +30 −0 stg/modis_io.py
  4. +3 −2 stg/plot_tools.py
  5. +103 −13 stg/space_time_gridding.py
  6. +6 −15 stg/time_gridding.py
View
@@ -43,6 +43,8 @@
DAY_MASK_KEY = "day_mask"
NIGHT_MASK_KEY = "night_mask"
+PLOT_SUFFIX = "plot_binary"
+
# these are suffixes used for temporary files
DAY_TEMP_SUFFIX = "_daytemp"
NIGHT_TEMP_SUFFIX = "_nighttemp"
@@ -51,12 +53,23 @@
DAY_NOBS_TEMP_SUFFIX = "_daynobstemp"
NIGHT_NOBS_TEMP_SUFFIX = "_nightnobstemp"
-# these are suffixes used for the final, packed files
+# these are suffixes used for the final, packed space gridded files
DAY_SUFFIX = "_dayfinal"
NIGHT_SUFFIX = "_nightfinal"
DAY_NOBS_SUFFIX = "_daynobsfinal"
NIGHT_NOBS_SUFFIX = "_nightnobsfinal"
+# these are suffixes for time gridded files
+HIGH_MODIFIER = "_high"
+MID_MODIFIER = "_mid"
+LOW_MODIFIER = "_low"
+DAILY_MEAN_SUFFIX = "_daily_mean"
+DAILY_W_AVG_SUFFIX = "_daily_weighted_average"
+DAILY_MIN_SUFFIX = "_daily_min"
+DAILY_MAX_SUFFIX = "_daily_max"
+DAILY_STD_SUFFIX = "_daily_std"
+DAILY_FRACTION_SUFFIX = "_daily_fraction"
+
# keys for keeping track of data sets
DAY_SET_KEY = "day"
NIGHT_SET_KEY = "night"
@@ -68,3 +81,7 @@
SET_FINAL_DATA_SUFF_KEY = "output data suffix"
SET_FINAL_NOBS_SUFF_KEY = "output nobs suffix"
+# for keeping track of data sets for time gridding
+NOBS_KEY = "nobs"
+SPACE_GRID_KEY = "space gridded data"
+BLANK_STEM_KEY = "stem"
View
@@ -18,6 +18,7 @@
import sys
import logging
import os
+import re
import numpy
@@ -49,6 +50,9 @@
DAY_SUFFIX, NIGHT_SUFFIX,
DAY_NOBS_SUFFIX, NIGHT_NOBS_SUFFIX]
+# time gridding suffixes
+TIME_FINAL_SUFFIXES = [ ]
+
# the strftime format for date stamping our files
DATE_STAMP_FORMAT = "%Y%m%d"
@@ -185,6 +189,43 @@ def build_name_stem (variable_name, date_time=None, satellite=None, algorithm=No
# date_stamp + "_" + var_name + suffix
+def get_date_stamp_from_file_name (file_name) :
+ """given a file name starting with a name stem created by build_name_stem, determine the date stamp for the file
+ """
+
+ date_stamp_to_return = None
+
+ # make sure we only have the stem
+ stem = file_name.split('.')[0]
+
+ # break the stem up by underscores
+ split_stem = stem.split('_')
+ for section in split_stem :
+
+ if re.match(r'\d\d\d\d\d\d\d\d', section) :
+
+ date_stamp_to_return = section
+
+ return date_stamp_to_return
+
+def organize_space_gridded_files (file_name_list) :
+ """organize the files into sets based on their names
+
+ Note: the type of the files is determined by the first file, so the
+ function will not handle mixed sets of files
+ """
+
+ to_return = { }
+
+ first_file = file_name_list[0]
+ if ( first_file.startswith(INST_MODIS) or
+ first_file.startswith(SAT_AQUA) or
+ first_file.startswith(SAT_TERRA) ) :
+ to_return = modis_io.organize_space_gridded_files(file_name_list)
+ # FUTURE, needs a statment for ctp
+
+ return to_return
+
def main():
import optparse
from pprint import pprint
View
@@ -18,6 +18,8 @@
import sys
import logging
+from collections import defaultdict
+
import numpy
from pyhdf.SD import SD,SDC, SDS, HDF4Error
@@ -251,6 +253,34 @@ def satellite_zenith_angle_to_scan_angle (sat_zenith_data) :
return scan_angle_data
+def organize_space_gridded_files (file_name_list) :
+ """given some files, organize them by date and group
+ """
+
+ to_return = defaultdict(dict)
+
+ for file_name in file_name_list :
+
+ # figure out the raw stem without a suffix
+ last_ = file_name.rfind('_')
+ stem = file_name[0:last_]
+
+ if file_name.find(DAY_SUFFIX) >= 0 :
+ to_return[DAY_SET_KEY][SPACE_GRID_KEY] = file_name
+ to_return[DAY_SET_KEY][BLANK_STEM_KEY] = stem
+ if file_name.find(DAY_NOBS_SUFFIX) >= 0 :
+ to_return[DAY_SET_KEY][NOBS_KEY] = file_name
+ to_return[DAY_SET_KEY][BLANK_STEM_KEY] = stem
+
+ if file_name.find(NIGHT_SUFFIX) >= 0 :
+ to_return[NIGHT_SET_KEY][SPACE_GRID_KEY] = file_name
+ to_return[NIGHT_SET_KEY][BLANK_STEM_KEY] = stem
+ if file_name.find(NIGHT_NOBS_SUFFIX) >=0 :
+ to_return[NIGHT_SET_KEY][NOBS_KEY] = file_name
+ to_return[NIGHT_SET_KEY][BLANK_STEM_KEY] = stem
+
+ return to_return
+
def main():
import optparse
from pprint import pprint
View
@@ -25,7 +25,8 @@
from matplotlib import pyplot as plt
import matplotlib.cm as cm
-from stg.stg_util import make_lat_lon_grids
+from stg.stg_util import make_lat_lon_grids
+from stg.constants import *
import keoni.fbf.workspace as Workspace
from mpl_toolkits.basemap import Basemap
@@ -175,7 +176,7 @@ def save_last_plot (fbf_attr_name, dpi_to_use=DEFAULT_DPI) :
save the last figure plotted with plt to an appropriately named file
"""
- plt.savefig("plot_binary.%s.png" % fbf_attr_name, dpi=dpi_to_use)
+ plt.savefig(PLOT_SUFFIX + "." + fbf_attr_name + ".png", dpi=dpi_to_use)
plt.close()
def sci_float(x):
View
@@ -35,6 +35,7 @@
import stg.general_guidebook as general_guidebook
import stg.io_manager as io_manager
import stg.space_gridding as space_gridding
+import stg.time_gridding as time_gridding
# TODO, in the long run handle the dtype more flexibly
TEMP_DATA_TYPE = numpy.dtype(numpy.float32)
@@ -110,8 +111,13 @@ def main():
help="set the size of the output grid's cells in degrees")
parser.add_option('-a', '--min_scan_angle', dest="minScanAngle", type='float', default=60.0,
help="the minimum scan angle that will be considered useful")
- parser.add_option('-d', '--do_process_with_little_data', dest="overrideMinCheck",
+ parser.add_option('-p', '--do_process_with_little_data', dest="overrideMinCheck",
action="store_true", default=False, help="run the full daily compilation even if many files are missing or unreadable")
+ parser.add_option('-f', '--fixed_nobs_cutoff', dest="fixedNobsCutoff", type='float', default=None,
+ help="the minimum number of nobs that must be present for data to be considered when time gridding")
+ parser.add_option('-d', '--dynamic_nobs_cutoff', dest="dynamicNobsCutoff", type='float', default=None,
+ help="the minimum nobs that must be present for data to be considered when time gridding," +
+ " expressed a fraction of the std from the mean")
# parse the uers options from the command line
options, args = parser.parse_args()
@@ -321,7 +327,7 @@ def space_day(*args) :
LOG.warn ("Daily file(s) will be produced, but data may be unusable for this day.")
else :
LOG.critical("Daily file(s) will not be produced for this day due to lack of data.")
- LOG.critical("If you wish to produce the daily file(s), rerun the program using the \'-d\' option.")
+ LOG.critical("If you wish to produce the daily file(s), rerun the program using the \'-p\' option.")
# only collect the daily data if we have enough files or have turned off the minimum check
if ( (sucessful_files >= (expected_num_files * EXPECTED_FRACTION_OF_FILES_PER_DAY)) or
@@ -393,22 +399,106 @@ def stats_day(*args) :
files.
"""
- # set up some of our input from the caller for easy access
- desired_variables = list(args) if len(args) > 0 else [ ]
- input_path = options.inputPath
- output_path = options.outputPath
-
+ # set up some of our caller determined settings for easy access
+ input_path = options.inputPath
+ output_path = options.outputPath
+ fix_nobs_cutoff = options.fixedNobsCutoff if options.fixedNobsCutoff >= 0 else None
+ dyn_nobs_cutoff = obtions.dynamicNobsCutoff if options.dynamicNobsCutoff >= 0 else None
# check the directory for sets of daily files
+ expected_files_by_date = defaultdict(list)
+ possible_files = os.listdir(input_path)
+ for file_name in sorted(possible_files) :
+ if file_name.startswith(PLOT_SUFFIX) :
+ LOG.debug("Disregarding plot file: " + str(file_name))
+ else :
+ date_stamp = io_manager.get_date_stamp_from_file_name(file_name)
+ if date_stamp is None :
+ LOG.debug("Disregarding file with no date stamp: " + str(file_name))
+ else :
+ expected_files_by_date[date_stamp].append(file_name)
- # for each set of daily files
-
- # load the main data
- # load the nobs
+ # organize the daily files
+ organized_files = { }
+ for date_stamp in expected_files_by_date.keys() :
+
+ organized_files[date_stamp] = io_manager.organize_space_gridded_files(expected_files_by_date[date_stamp])
- # calculate the std, min, max, and (weighted or non-weighted) average
+ # set up the variable workspace so we can load our input files
+ var_workspace = Workspace.Workspace(dir=input_path)
- # save the various stats to files
+ # for each set of daily files
+ for date_stamp in organized_files.keys() :
+
+ LOG.debug("Processing files for date stamp " + str(date_stamp))
+
+ this_day = organized_files[date_stamp]
+
+ for set_key in this_day.keys() :
+
+ LOG.debug("Processing file set for " + str(set_key))
+
+ # pull the base stem for ease of use
+ base_stem = this_day[set_key][BLANK_STEM_KEY]
+
+ # load the main space gridded data
+ main_stem = this_day[set_key][SPACE_GRID_KEY].split('.')[0]
+ gridded_data = var_workspace[main_stem][:]
+
+ # load the nobs
+ nobs_stem = this_day[set_key][NOBS_KEY].split('.')[0]
+ nobs_data = var_workspace[nobs_stem][:]
+
+ # build the cutoff mask
+ bad_data = time_gridding.create_sample_size_cutoff_mask(gridded_data, nobs_data,
+ nobs_data, # this should be the overall nobs, but I don't have those right now!
+ fixed_cutoff=fix_nobs_cutoff,
+ dynamic_std_cutoff=dyn_nobs_cutoff)
+ # apply the cutoff mask
+ clean_gridded_data = gridded_data.copy()
+ clean_gridded_data[bad_data] = numpy.nan
+
+ # calculate the std, min, max, and (weighted or non-weighted) average
+ min_values = numpy.nanmin(clean_gridded_data, axis=0)
+ max_values = numpy.nanmax(clean_gridded_data, axis=0)
+ std_values = numpy.nanstd(clean_gridded_data, axis=0)
+ mean_values = numpy.nansum(clean_gridded_data, axis=0) / numpy.sum(numpy.isfinite(clean_gridded_data), axis=0)
+
+ # calculate the weighted average contribution for this day
+ w_avg_values = time_gridding.calculate_partial_weighted_time_average(clean_gridded_data, nobs_data[0])
+
+ # calculate the data fraction
+ #fraction = numpy.zeros(mean_values.shape, dtype=TEMP_DATA_TYPE)
+ #valid_mask = nobs_data[0] > 0
+ #fraction[valid_mask] = numpy.sum(numpy.isfinite(clean_gridded_data), axis=0)[valid_mask] / nobs_data[0][valid_mask]
+ fraction = numpy.sum(numpy.isfinite(clean_gridded_data), axis=0) / nobs_data[0]
+
+ # save the various stats to files
+
+ # save the min and max
+ io_manager.save_data_to_file(base_stem + DAILY_MIN_SUFFIX,
+ min_values.shape, output_path, min_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+ io_manager.save_data_to_file(base_stem + DAILY_MAX_SUFFIX,
+ max_values.shape, output_path, max_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+
+ # save the std and the averages
+ io_manager.save_data_to_file(base_stem + DAILY_STD_SUFFIX,
+ std_values.shape, output_path, std_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+ io_manager.save_data_to_file(base_stem + DAILY_MEAN_SUFFIX,
+ mean_values.shape, output_path, mean_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+ io_manager.save_data_to_file(base_stem + DAILY_W_AVG_SUFFIX,
+ w_avg_values.shape, output_path, w_avg_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+
+ # save the fraction
+ io_manager.save_data_to_file(base_stem + DAILY_FRACTION_SUFFIX,
+ fraction.shape, output_path, fraction,
+ TEMP_DATA_TYPE, file_permissions="w")
+
def stats_month(*args) :
View
@@ -48,26 +48,17 @@ def create_sample_size_cutoff_mask (data_array, nobs_array,
return bad_data_mask
-def calculate_partial_weighted_time_average (daily_data_array, daily_nobs_array,
- overall_sum_num_measurments_array, overall_sum_nobs_array) :
+def calculate_partial_weighted_time_average (daily_data_array, daily_nobs_array) :
"""
- given a set of daily data and their matching number of observations array as well as the
- sum of the overall number of measurments and number of observations per cell (ie. the collapsed
- 2D arrays summed over the whole time period being analyzed), calculate the contribution from this
- day to the weighted time average
+ given a set of daily data and a matching number-of-observations array
+ calculate the contribution from this day to the weighted time average
Note: to calculate the weighted time average over the full period, add up all of the daily
- partial weighted time averages in that period
+ partial weighted time averages in that period and divide
+ by (overall_sum_num_measurments_array / overall_sum_nobs_array)
"""
good_measurments_this_day = numpy.sum(numpy.isfinite(daily_data_array), axis=0)
- this_day_weighted_average = ( (daily_data_array * (good_measurments_this_day / daily_nobs_array))
- / (overall_sum_num_measurments_array / overall_sum_nobs_array) )
+ this_day_weighted_average = numpy.nansum(daily_data_array, axis=0) * (good_measurments_this_day / daily_nobs_array)
return this_day_weighted_average
-
-
-
-
-
-

0 comments on commit 70db317

Please sign in to comment.