Skip to content

Commit

Permalink
adding a file cutoff for daily processing that's based on the number …
Browse files Browse the repository at this point in the history
…of expected files for that instrument
  • Loading branch information
Eva Schiffer committed Feb 21, 2014
1 parent fc60772 commit cd6c2f0
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 97 deletions.
2 changes: 1 addition & 1 deletion stg/constants.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@
LON_KEY = "longitude" LON_KEY = "longitude"
LAT_KEY = "latitude" LAT_KEY = "latitude"
DAY_MASK_KEY = "day_mask" DAY_MASK_KEY = "day_mask"
NIGHT_MASK_KEY = "night_mask" NIGHT_MASK_KEY = "night_mask"
12 changes: 12 additions & 0 deletions stg/general_guidebook.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@ def get_variable_names (file_name_string, user_requested_names=[ ]) :


return var_names return var_names


def get_expected_files_per_day (file_name_string) :
"""given a file name, determine the expected number of files per day for that instrument
"""

number_to_return = None

if modis_guidebook.is_MODIS_file(file_name_string) :
number_to_return = modis_guidebook.EXPECTED_FILES_PER_DAY
# FUTURE, needs a statment for ctp

return number_to_return

def main(): def main():
import optparse import optparse
from pprint import pprint from pprint import pprint
Expand Down
3 changes: 3 additions & 0 deletions stg/modis_guidebook.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@


LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)


# the expected number of files per day if nothing goes wrong
EXPECTED_FILES_PER_DAY = 288

# variable names expected in the files # variable names expected in the files
CLOUD_PHASE_NAME = 'Cloud_Phase_Infrared' CLOUD_PHASE_NAME = 'Cloud_Phase_Infrared'
CLOUD_TOP_TEMP_NAME = 'Cloud_Top_Temperature' CLOUD_TOP_TEMP_NAME = 'Cloud_Top_Temperature'
Expand Down
226 changes: 130 additions & 96 deletions stg/space_time_gridding.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
# TODO, in the long run handle the dtype more flexibly # TODO, in the long run handle the dtype more flexibly
TEMP_DATA_TYPE = numpy.dtype(numpy.float32) TEMP_DATA_TYPE = numpy.dtype(numpy.float32)


# TODO, need to confirm with Nadia that this is the cutoff she wants
EXPECTED_FRACTION_OF_FILES_PER_DAY = 2.0 / 3.0

LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)


def get_version_string() : def get_version_string() :
Expand Down Expand Up @@ -107,6 +110,8 @@ def main():
help="set the size of the output grid's cells in degrees") help="set the size of the output grid's cells in degrees")
parser.add_option('-a', '--min_scan_angle', dest="minScanAngle", type='float', default=60.0, parser.add_option('-a', '--min_scan_angle', dest="minScanAngle", type='float', default=60.0,
help="the minimum scan angle that will be considered useful") help="the minimum scan angle that will be considered useful")
parser.add_option('-d', '--do_process_with_little_data', dest="overrideMinCheck",
action="store_true", default=False, help="run the full daily compilation even if many files are missing or unreadable")


# parse the uers options from the command line # parse the uers options from the command line
options, args = parser.parse_args() options, args = parser.parse_args()
Expand Down Expand Up @@ -141,22 +146,23 @@ def space_day(*args) :
""" """


# set up some of our input from the caller for easy access # set up some of our input from the caller for easy access
desired_variables = list(args) if len(args) > 0 else [ ] desired_variables = list(args) if len(args) > 0 else [ ]
input_path = options.inputPath input_path = options.inputPath
output_path = options.outputPath output_path = options.outputPath
min_scan_angle = options.minScanAngle min_scan_angle = options.minScanAngle
grid_degrees = float(options.gridDegrees) grid_degrees = float(options.gridDegrees)


# determine the grid size in number of elements # determine the grid size in number of elements
grid_lon_size = int(math.ceil(360.0 / grid_degrees)) grid_lon_size = int(math.ceil(360.0 / grid_degrees))
grid_lat_size = int(math.ceil(180.0 / grid_degrees)) grid_lat_size = int(math.ceil(180.0 / grid_degrees))
space_grid_shape = (grid_lon_size, grid_lat_size) # TODO, is this the correct order? space_grid_shape = (grid_lon_size, grid_lat_size) # TODO, is this the correct order?


# look through our files and figure out what variables we expect from them # look through our files and figure out what variables we expect from them
possible_files = os.listdir(input_path) possible_files = os.listdir(input_path)
expected_vars = { } expected_vars = { }
all_vars = set() all_vars = set()
date_time_temp = None date_time_temp = None
expected_num_files = None
for file_name in sorted(possible_files) : for file_name in sorted(possible_files) :
expected_vars[file_name] = general_guidebook.get_variable_names (file_name, user_requested_names=desired_variables) expected_vars[file_name] = general_guidebook.get_variable_names (file_name, user_requested_names=desired_variables)
# if this file has no variables, remove it from our files for consideration # if this file has no variables, remove it from our files for consideration
Expand All @@ -166,7 +172,8 @@ def space_day(*args) :
# otherwise, add the variables we found to our list of all variables and try to get a time from the file # otherwise, add the variables we found to our list of all variables and try to get a time from the file
else : else :
all_vars.update(expected_vars[file_name]) all_vars.update(expected_vars[file_name])
date_time_temp = general_guidebook.parse_datetime_from_filename(file_name) if date_time_temp is None else date_time_temp date_time_temp = general_guidebook.parse_datetime_from_filename(file_name) if date_time_temp is None else date_time_temp
expected_num_files = general_guidebook.get_expected_files_per_day(file_name) if expected_num_files is None else expected_num_files


# check to make sure our intermediate file names don't exist already # check to make sure our intermediate file names don't exist already
for var_name in all_vars : for var_name in all_vars :
Expand Down Expand Up @@ -285,91 +292,106 @@ def space_day(*args) :
# if we got to here we processed the file correctly # if we got to here we processed the file correctly
sucessful_files += 1 sucessful_files += 1


# collapse the per variable space grids to remove excess NaNs # warn the user if we have fewer files than we need for this instrument
for variable_name in all_vars : if sucessful_files < (expected_num_files * EXPECTED_FRACTION_OF_FILES_PER_DAY) :

LOG.warn("Processed " + str(sucessful_files) + " files successfully for this day.")
LOG.debug("Packing space data for variable: " + variable_name) LOG.warn("Expected " + str(expected_num_files) + " files for this instrument type.")


# load the variable's density maps if options.overrideMinCheck :
var_workspace = Workspace.Workspace(dir=output_path) LOG.warn ("Daily file will be produced, but data may be unusable for this day.")
day_var_density = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_DENSITY_TEMP_SUFFIX)][:]
night_var_density = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_DENSITY_TEMP_SUFFIX)][:]

# only do the day data if we have some
if numpy.sum(day_var_density) > 0 :

# load the sparse space grid
day_var_data = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_TEMP_SUFFIX)][:]

# collapse the space grid
final_day_data = space_gridding.pack_space_grid(day_var_data, day_var_density)

# save the final array to an appropriately named file
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_SUFFIX),
space_grid_shape, output_path, final_day_data,
TEMP_DATA_TYPE, file_permissions="w")

# load the nobs file
nobs_counts = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_NOBS_TEMP_SUFFIX)][:]

# collapse the nobs
nobs_final = numpy.sum(nobs_counts, axis=0)

# save the final nobs array to an appropriately named file
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_NOBS_SUFFIX),
space_grid_shape, output_path,
nobs_final, TEMP_DATA_TYPE, file_permissions="w")

else : else :
LOG.warn("No day data was found for variable " + variable_name + ". Day files will not be written.") LOG.critical("Daily file will not be produced for this day due to lack of data.")
LOG.critical("If you wish to produce the daily file, rerun the program using the \'-d\' option.")

# only collect the daily data if we have enough files or have turned off the minimum check
if ( (sucessful_files >= (expected_num_files * EXPECTED_FRACTION_OF_FILES_PER_DAY)) or
(options.overrideMinCheck) ):


# only do night data if we have some # collapse the per variable space grids to remove excess NaNs
if numpy.sum(night_var_density) > 0 : for variable_name in all_vars :

# load the sparse space grid
night_var_data = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_TEMP_SUFFIX)][:]

# collapse the space grid
final_night_data = space_gridding.pack_space_grid(night_var_data, night_var_density)


# save the final array to an appropriately named file LOG.debug("Packing space data for variable: " + variable_name)
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_SUFFIX),
space_grid_shape, output_path, final_night_data,
TEMP_DATA_TYPE, file_permissions="w")


# load the nobs file # load the variable's density maps
nobs_counts = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp, var_workspace = Workspace.Workspace(dir=output_path)
day_var_density = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None, satellite=None, algorithm=None,
suffix=io_manager.NIGHT_NOBS_TEMP_SUFFIX)][:] suffix=io_manager.DAY_DENSITY_TEMP_SUFFIX)][:]

night_var_density = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
# collapse the nobs satellite=None, algorithm=None,
nobs_final = numpy.sum(nobs_counts, axis=0) suffix=io_manager.NIGHT_DENSITY_TEMP_SUFFIX)][:]


# save the final nobs array to an appropriately named file # only do the day data if we have some
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp, if numpy.sum(day_var_density) > 0 :
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_NOBS_SUFFIX), # load the sparse space grid
space_grid_shape, output_path, day_var_data = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
nobs_final, TEMP_DATA_TYPE, file_permissions="w") satellite=None, algorithm=None,
suffix=io_manager.DAY_TEMP_SUFFIX)][:]

# collapse the space grid
final_day_data = space_gridding.pack_space_grid(day_var_data, day_var_density)

# save the final array to an appropriately named file
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_SUFFIX),
space_grid_shape, output_path, final_day_data,
TEMP_DATA_TYPE, file_permissions="w")

# load the nobs file
nobs_counts = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_NOBS_TEMP_SUFFIX)][:]

# collapse the nobs
nobs_final = numpy.sum(nobs_counts, axis=0)

# save the final nobs array to an appropriately named file
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.DAY_NOBS_SUFFIX),
space_grid_shape, output_path,
nobs_final, TEMP_DATA_TYPE, file_permissions="w")

else :
LOG.warn("No day data was found for variable " + variable_name + ". Day files will not be written.")


else : # only do night data if we have some
LOG.warn("No night data was found for variable " + variable_name + ". Night files will not be written.") if numpy.sum(night_var_density) > 0 :

# load the sparse space grid
night_var_data = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_TEMP_SUFFIX)][:]

# collapse the space grid
final_night_data = space_gridding.pack_space_grid(night_var_data, night_var_density)

# save the final array to an appropriately named file
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_SUFFIX),
space_grid_shape, output_path, final_night_data,
TEMP_DATA_TYPE, file_permissions="w")

# load the nobs file
nobs_counts = var_workspace[io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_NOBS_TEMP_SUFFIX)][:]

# collapse the nobs
nobs_final = numpy.sum(nobs_counts, axis=0)

# save the final nobs array to an appropriately named file
io_manager.save_data_to_file(io_manager.build_name_stem(variable_name, date_time=date_time_temp,
satellite=None, algorithm=None,
suffix=io_manager.NIGHT_NOBS_SUFFIX),
space_grid_shape, output_path,
nobs_final, TEMP_DATA_TYPE, file_permissions="w")

else :
LOG.warn("No night data was found for variable " + variable_name + ". Night files will not be written.")


LOG.debug("Successfully processed " + str(sucessful_files) + " files and failed to process " + str(failed_files) + " files for this day.") LOG.debug("Successfully processed " + str(sucessful_files) + " files and failed to process " + str(failed_files) + " files for this day.")


Expand All @@ -378,9 +400,10 @@ def space_day(*args) :
remove_file_patterns(output_path, remove_suffixes) remove_file_patterns(output_path, remove_suffixes)


def stats_day(*args) : def stats_day(*args) :
"""given files of daily space gridded data, calculate daily stats """given a day worth of files of daily space gridded data, calculate daily stats
given an input directory that contains appropriate files,
calculate daily stats and put the resulting gridded files given an input directory that contains space gridded files for a day,
calculate daily stats and put the resulting time gridded files
for that day in the output directory. for that day in the output directory.
Note: the output directory will also be used for intermediary working Note: the output directory will also be used for intermediary working
Expand All @@ -391,12 +414,23 @@ def stats_day(*args) :
desired_variables = list(args) if len(args) > 0 else [ ] desired_variables = list(args) if len(args) > 0 else [ ]
input_path = options.inputPath input_path = options.inputPath
output_path = options.outputPath output_path = options.outputPath
min_scan_angle = options.minScanAngle
grid_degrees = float(options.gridDegrees)
# check the directory for sets of daily files

# for each set of daily files

# load the main data
# load the nobs

# calculate the std, min, max, and (weighted or non-weighted) average

# save the various stats to files



def stats_month(*args) : def stats_month(*args) :
"""given a month of daily space gridded data, calculate montly stats """given a month of daily space gridded data, calculate montly stats
given an input directory that contains appropriate files, given an input directory that contains appropriate daily stats,
calculate monthly stats and put the resulting gridded files calculate monthly stats and put the resulting gridded files
for that month in the output directory. for that month in the output directory.
Expand Down

0 comments on commit cd6c2f0

Please sign in to comment.