Permalink
Browse files

breaking cloud top pressure into low/mid/high

  • Loading branch information...
1 parent 70db317 commit 35b69228de8d318614b1c7eaf65bbd97e0ca5b33 Eva Schiffer committed Feb 22, 2014
Showing with 120 additions and 41 deletions.
  1. +31 −0 stg/general_guidebook.py
  2. +41 −0 stg/modis_guidebook.py
  3. +47 −40 stg/space_time_gridding.py
  4. +1 −1 stg/time_gridding.py
@@ -84,6 +84,37 @@ def get_expected_files_per_day (satellite_constant) :
return number_to_return
+def mask_variable_for_time_gridding (flat_file_name, variable_name, variable_data) :
+ """given a variable name, return one or more masked areas
+
+ the masked areas generated by this function should be treated as
+ different variables for the purpose of time gridding
+ """
+
+ masks_to_return = { }
+
+ if ( flat_file_name.startswith(INST_MODIS) or
+ flat_file_name.startswith(SAT_AQUA) or
+ flat_file_name.startswith(SAT_TERRA) ) :
+ masks_to_return = modis_guidebook.mask_variable_for_time_gridding(variable_name, variable_data)
+ # FUTURE, needs a statment for ctp
+
+ return masks_to_return
+
+def get_variable_name_from_flat_file (flat_file_name) :
+ """given an fbf file name, figure out which variable is in it
+ """
+
+ name_to_return = None
+
+ if ( flat_file_name.startswith(INST_MODIS) or
+ flat_file_name.startswith(SAT_AQUA) or
+ flat_file_name.startswith(SAT_TERRA) ) :
+ name_to_return = modis_guidebook.get_variable_name_from_flat_file(flat_file_name)
+ # FUTURE, needs a statment for ctp
+
+ return name_to_return
+
def main():
import optparse
from pprint import pprint
@@ -25,6 +25,15 @@
LOG = logging.getLogger(__name__)
+# these are constants for separating the cloud top pressure in to low/mid/high
+"""
+high pressure is < 440
+low pressure is > 680
+mid pressure is everything in between
+"""
+HIGH_CLOUD_TOP_PRESSURE_CONST = 440
+LOW_CLOUD_TOP_PRESSURE_CONST = 680
+
# the expected number of files per day if nothing goes wrong
EXPECTED_FILES_PER_DAY = 288
@@ -234,6 +243,38 @@ def get_variable_names (user_requested_names) :
return var_names
+def mask_variable_for_time_gridding (variable_name, variable_data) :
+ """given a variable name, return one or more masked areas
+
+ the masked areas generated by this function should be treated as
+ different variables for the purpose of time gridding
+ """
+
+ to_return = { }
+
+ if variable_name == CLOUD_TOP_PRESS_NAME :
+
+ to_return[HIGH_MODIFIER] = variable_data < HIGH_CLOUD_TOP_PRESSURE_CONST
+ to_return[MID_MODIFIER] = (variable_data >= HIGH_CLOUD_TOP_PRESSURE_CONST) & (variable_data <= LOW_CLOUD_TOP_PRESSURE_CONST)
+ to_return[LOW_MODIFIER] = variable_data > LOW_CLOUD_TOP_PRESSURE_CONST
+
+ else :
+ to_return[""] = numpy.ones(variable_data.shape, dtype=numpy.bool)
+
+ return to_return
+
+def get_variable_name_from_flat_file (flat_file_name) :
+ """given an fbf file name, figure out which variable is in it
+ """
+
+ name_to_return = None
+
+ for possible_var_name in DATA_TYPE_TO_USE.keys() :
+ if flat_file_name.find(possible_var_name) >= 0 :
+ name_to_return = possible_var_name
+
+ return name_to_return
+
def main():
import optparse
from pprint import pprint
@@ -450,54 +450,61 @@ def stats_day(*args) :
nobs_data = var_workspace[nobs_stem][:]
# build the cutoff mask
- bad_data = time_gridding.create_sample_size_cutoff_mask(gridded_data, nobs_data,
+ bad_data = time_gridding.create_sample_size_cutoff_mask(nobs_data,
nobs_data, # this should be the overall nobs, but I don't have those right now!
fixed_cutoff=fix_nobs_cutoff,
dynamic_std_cutoff=dyn_nobs_cutoff)
# apply the cutoff mask
clean_gridded_data = gridded_data.copy()
clean_gridded_data[bad_data] = numpy.nan
- # calculate the std, min, max, and (weighted or non-weighted) average
- min_values = numpy.nanmin(clean_gridded_data, axis=0)
- max_values = numpy.nanmax(clean_gridded_data, axis=0)
- std_values = numpy.nanstd(clean_gridded_data, axis=0)
- mean_values = numpy.nansum(clean_gridded_data, axis=0) / numpy.sum(numpy.isfinite(clean_gridded_data), axis=0)
+ # figure out if we need to split our data
+ variable_name = general_guidebook.get_variable_name_from_flat_file(base_stem)
+ masks_to_split = general_guidebook.mask_variable_for_time_gridding(base_stem, variable_name, clean_gridded_data)
- # calculate the weighted average contribution for this day
- w_avg_values = time_gridding.calculate_partial_weighted_time_average(clean_gridded_data, nobs_data[0])
-
- # calculate the data fraction
- #fraction = numpy.zeros(mean_values.shape, dtype=TEMP_DATA_TYPE)
- #valid_mask = nobs_data[0] > 0
- #fraction[valid_mask] = numpy.sum(numpy.isfinite(clean_gridded_data), axis=0)[valid_mask] / nobs_data[0][valid_mask]
- fraction = numpy.sum(numpy.isfinite(clean_gridded_data), axis=0) / nobs_data[0]
-
- # save the various stats to files
-
- # save the min and max
- io_manager.save_data_to_file(base_stem + DAILY_MIN_SUFFIX,
- min_values.shape, output_path, min_values,
- TEMP_DATA_TYPE, file_permissions="w")
- io_manager.save_data_to_file(base_stem + DAILY_MAX_SUFFIX,
- max_values.shape, output_path, max_values,
- TEMP_DATA_TYPE, file_permissions="w")
-
- # save the std and the averages
- io_manager.save_data_to_file(base_stem + DAILY_STD_SUFFIX,
- std_values.shape, output_path, std_values,
- TEMP_DATA_TYPE, file_permissions="w")
- io_manager.save_data_to_file(base_stem + DAILY_MEAN_SUFFIX,
- mean_values.shape, output_path, mean_values,
- TEMP_DATA_TYPE, file_permissions="w")
- io_manager.save_data_to_file(base_stem + DAILY_W_AVG_SUFFIX,
- w_avg_values.shape, output_path, w_avg_values,
- TEMP_DATA_TYPE, file_permissions="w")
-
- # save the fraction
- io_manager.save_data_to_file(base_stem + DAILY_FRACTION_SUFFIX,
- fraction.shape, output_path, fraction,
- TEMP_DATA_TYPE, file_permissions="w")
+ for mask_key in masks_to_split.keys() :
+
+ this_mask = masks_to_split[mask_key]
+ this_mask_data = numpy.ones(clean_gridded_data.shape, dtype=TEMP_DATA_TYPE) * numpy.nan
+ this_mask_data[this_mask] = clean_gridded_data[this_mask]
+
+ # calculate the std, min, max, and (weighted or non-weighted) average
+ min_values = numpy.nanmin(this_mask_data, axis=0)
+ max_values = numpy.nanmax(this_mask_data, axis=0)
+ std_values = numpy.nanstd(this_mask_data, axis=0)
+ mean_values = numpy.nansum(this_mask_data, axis=0) / numpy.sum(numpy.isfinite(this_mask_data), axis=0)
+
+ # calculate the weighted average contribution for this day
+ w_avg_values = time_gridding.calculate_partial_weighted_time_average(this_mask_data, nobs_data[0])
+
+ # calculate the data fraction
+ fraction = numpy.sum(numpy.isfinite(this_mask_data), axis=0) / nobs_data[0]
+
+ # save the various stats to files
+
+ # save the min and max
+ io_manager.save_data_to_file(base_stem + mask_key + DAILY_MIN_SUFFIX,
+ min_values.shape, output_path, min_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+ io_manager.save_data_to_file(base_stem + mask_key + DAILY_MAX_SUFFIX,
+ max_values.shape, output_path, max_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+
+ # save the std and the averages
+ io_manager.save_data_to_file(base_stem + mask_key + DAILY_STD_SUFFIX,
+ std_values.shape, output_path, std_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+ io_manager.save_data_to_file(base_stem + mask_key + DAILY_MEAN_SUFFIX,
+ mean_values.shape, output_path, mean_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+ io_manager.save_data_to_file(base_stem + mask_key + DAILY_W_AVG_SUFFIX,
+ w_avg_values.shape, output_path, w_avg_values,
+ TEMP_DATA_TYPE, file_permissions="w")
+
+ # save the fraction
+ io_manager.save_data_to_file(base_stem + mask_key + DAILY_FRACTION_SUFFIX,
+ fraction.shape, output_path, fraction,
+ TEMP_DATA_TYPE, file_permissions="w")
@@ -17,7 +17,7 @@
import numpy
-def create_sample_size_cutoff_mask (data_array, nobs_array,
+def create_sample_size_cutoff_mask (nobs_array,
overall_nobs_array,
fixed_cutoff=None, dynamic_std_cutoff=None) :
"""

0 comments on commit 35b6922

Please sign in to comment.