Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Add MAD to the aggregation.

  • Loading branch information...
commit 974cef67b10506c25aa1edf2c8664ec7b163b663 1 parent 24ad4b3
@nursix nursix authored
View
2  VERSION
@@ -1 +1 @@
-vita-0.5.5-devel-1881-g2ba5ba0 (2012-10-10 00:10:21)
+vita-0.5.5-devel-1882-g24ad4b3 (2012-10-10 10:10:02)
View
4 controllers/vulnerability.py
@@ -617,6 +617,7 @@ def rdata():
stable.date,
stable.mean,
stable.median,
+ stable.mad,
orderby=~stable.date)
keys = []
@@ -632,10 +633,9 @@ def rdata():
if p == pos[resilience_id]:
val = int(round(row.mean, 0))
- dev = 0 # @todo: implement
else:
val = row.median
- dev = 0 # @todo: implement
+ dev = row.mad
if l not in data:
ldata = data[l] = dict()
View
138 modules/eden/stats.py
@@ -73,7 +73,7 @@ def model(self):
stats_demographic = T("Demographic"),
project_beneficiary_type = T("Project Beneficiary Type"),
#survey_question_type = T("Survey Question Type"),
-
+
#climate_parameter = T("Climate Parameter"),
)
@@ -186,6 +186,10 @@ def model(self):
Field("median", "double",
label = T("Median"),
),
+ Field("mad", "double",
+ label = T("Median Absolute Deviation"),
+ default = 0.0,
+ ),
#Field("mean_ad", "double",
# label = T("Mean Absolute Deviation"),
# ),
@@ -295,7 +299,7 @@ def stats_update_time_aggregate(cls, data_id=None):
param_location_dict = {} # a list of locations for each parameter
location_dict = {} # a list of locations
loc_level_list = {} # a list of levels for each location
-
+
if current.deployment_settings.has_module("vulnerability"):
vulnerability = True
vulnerability_id_list = s3db.vulnerability_ids()
@@ -309,14 +313,14 @@ def stats_update_time_aggregate(cls, data_id=None):
if not location_id or not parameter_id:
return
(start_date, end_date) = stats_aggregated_period(record.date)
-
+
# Get all the stats_data records for this location and parameter
query = (dtable.location_id == location_id) & \
(dtable.parameter_id == parameter_id) & \
(dtable.deleted != True) & \
(dtable.approved_by != None)
data_rows = db(query).select()
-
+
# Get each record and store them in a dict keyed on the start date of
# the aggregated period. The value stored is a list containing the date
# the data_id and the value. If a record already exists for the
@@ -349,7 +353,7 @@ def stats_update_time_aggregate(cls, data_id=None):
data[start_date] = Storage(date = row_date,
id = row.data_id,
value = row.value)
-
+
# Get all the aggregate records for this parameter and location
query = (atable.location_id == location_id) & \
(atable.parameter_id == parameter_id) & \
@@ -359,7 +363,7 @@ def stats_update_time_aggregate(cls, data_id=None):
atable.date,
atable.end_date,
atable.mean)
-
+
aggr = dict()
for row in aggr_rows:
(start_date, end_date) = stats_aggregated_period(row.date)
@@ -367,7 +371,7 @@ def stats_update_time_aggregate(cls, data_id=None):
id = row.id,
type = row.agg_type,
end_date = row.end_date)
-
+
# Step through each period and check that aggr is correct
last_data_period = earliest_period
last_type_agg = False # The type of previous non-copy record was aggr
@@ -505,7 +509,7 @@ def stats_update_time_aggregate(cls, data_id=None):
# Only need to check the start date of the first period
if changed_periods[0][0] < location_dict[location_id][0][0]:
location_dict[location_id] = changed_periods
-
+
# End of loop through each stats_data record
# OPTIMISATION step 1
@@ -667,6 +671,7 @@ def stats_update_aggregate_location(location_level,
values_max = max(values)
values_avg = float(values_sum) / values_len
values_med = numpy.median(values)
+ values_mad = numpy.median([abs(v - values_med) for v in values])
# Add or update the aggregated values in the database
@@ -686,126 +691,29 @@ def stats_update_aggregate_location(location_level,
min = values_min,
max = values_max,
mean = values_avg,
- median = values_med)
+ median = values_med,
+ mad = values_mad
+ )
else:
# Insert new
- atable.insert(parameter_id = parameter_id,
+ atable.insert(agg_type = 2, # Location
+ parameter_id = parameter_id,
location_id = location_id,
date = start_date,
end_date = end_date,
- agg_type = 2, # Location
reported_count = values_len,
ward_count = len(child_ids),
min = values_min,
max = values_max,
mean = values_avg,
- median = values_med)
+ median = values_med,
+ mad = values_mad
+ )
return
# ---------------------------------------------------------------------
@staticmethod
- def stats_update_aggregate_location_old(location_id,
- parameter_id,
- start_date,
- end_date
- ):
- """
- Calculates the stats_aggregate for a specific parameter at a
- specific location.
-
- Where appropriate add test cases to modules/unit_tests/eden/stats.py
-
- * unused in this version, but retained for debug/reference
- """
-
- db = current.db
- s3db = current.s3db
- table = s3db.stats_data
- agg_table = s3db.stats_aggregate
-
- # Get all the child locations
- child_locations = current.gis.get_children(location_id)
- child_ids = [row.id for row in child_locations]
-
- # The dates have been converted to a string so the following is needed
- if end_date == "None":
- # Get the most recent stats_data record for each location
- query = (table.location_id.belongs(child_ids)) & \
- (table.parameter_id == parameter_id) & \
- (table.deleted != True) & \
- (table.approved_by != None)
- end_date = None
- else:
- query = (table.location_id.belongs(child_ids)) & \
- (table.parameter_id == parameter_id) & \
- (table.date <= end_date) & \
- (table.deleted != True) & \
- (table.approved_by != None)
- rows = db(query).select(table.value,
- table.date,
- table.location_id,
- orderby=(table.location_id, ~table.date),
- )
- # The query may return duplicate records for the same location
- # Use the most recent, which because of the ordering will be the first
- rec_cnt = 0
- sum = 0
- last_location = 0
- num_list = []
- append = num_list.append
- for row in rows:
- loc_id = row.location_id
- if loc_id != last_location:
- last_location = loc_id
- value = row.value
- append(value)
- sum += value
- rec_cnt += 1
- if rec_cnt == 0:
- return
-
- num_list.sort()
- mean = float(sum) / rec_cnt
- min = num_list[0]
- max = num_list[rec_cnt - 1]
- if rec_cnt % 2 == 0:
- median = float(num_list[rec_cnt / 2] + num_list[rec_cnt / 2 - 1]) / 2.0
- else:
- median = num_list[rec_cnt / 2]
- # Add the value to the database
- query = (agg_table.location_id == location_id) & \
- (agg_table.parameter_id == parameter_id) & \
- (agg_table.date == start_date) & \
- (agg_table.end_date == end_date) & \
- (agg_table.deleted != True)
- exists = db(query).select(agg_table.id,
- limitby=(0, 1)).first()
- if exists:
- db(query).update(agg_type = 2, # Location
- reported_count = rec_cnt,
- ward_count = len(child_ids),
- min = min,
- max = max,
- mean = mean,
- median = median,
- )
- else:
- agg_table.insert(parameter_id = parameter_id,
- location_id = location_id,
- date = start_date,
- end_date = end_date,
- agg_type = 2, # Location
- reported_count = rec_cnt,
- ward_count = len(child_ids),
- min = min,
- max = max,
- mean = mean,
- median = median,
- )
-
- # ---------------------------------------------------------------------
- @staticmethod
def stats_aggregated_period(data_date = None):
"""
This will return the start and end dates of the aggregated time period.
@@ -951,7 +859,7 @@ def stats_demographic_duplicate(item):
if duplicate:
item.id = duplicate.id
item.method = item.METHOD.UPDATE
-
+
# =============================================================================
class S3StatsGroupModel(S3Model):
"""
@@ -1196,7 +1104,7 @@ def stats_group_clean():
dtable.location_id,
dtable.value)
S3StatsModel.stats_update_time_aggregate(data_list)
-
+
query = (gtable.deleted != True) & \
(gtable.dirty == True) & \
(gtable.approved_by != None)
View
100 modules/eden/vulnerability.py
@@ -268,25 +268,25 @@ def vulnerability_resilience(loc_level,
stable = s3db.stats_aggregate
# Get the data from the vulnerability_data table
- query = (vtable.parameter_id.belongs(indicator_pids)) & \
+ query = (vtable.deleted != True) & \
(vtable.approved_by != None) & \
- (vtable.deleted != True)
- ward_cnt = 1
+ (vtable.parameter_id.belongs(indicator_pids))
+ ward_count = 1
if use_location:
- query = query & (vtable.location_id == location_id)
+ query &= (vtable.location_id == location_id)
else:
# Get all the child locations
child_locations = current.gis.get_children(location_id, loc_level)
child_ids = [row.id for row in child_locations]
- ward_cnt = len(child_ids)
- query = query & (vtable.location_id.belongs(child_ids))
+ ward_count = len(child_ids)
+ query &= (vtable.location_id.belongs(child_ids))
if date_period_end is None:
pass
elif date_period_end == "None":
date_period_end = None
else:
- query = query & (vtable.date <= date_period_end)
+ query &= (vtable.date <= date_period_end)
rows = db(query).select(vtable.parameter_id,
vtable.location_id,
vtable.value,
@@ -296,38 +296,42 @@ def vulnerability_resilience(loc_level,
~vtable.date
)
)
- # The query may return duplicate records for the same location & parameter
- # Use the most recent, which because of the ordering will be the first
- rec_cnt = 0
- sum = 0
- num_list = []
- append = num_list.append
- location_cnt = []
- last_record = (0,0)
+
+ # The query may return duplicate records for the same
+ # location+parameter: use the most recent, which because
+ # of the ordering will be the first
+ values = []
+ append = values.append
+ locations = []
+ new_location = locations.append
+ last_record = (0, 0)
for row in rows:
value = row.value
if not value:
continue
- loc_id = row.location_id
- param_id = row.parameter_id
- if last_record != (loc_id, param_id):
- last_record = (loc_id, param_id)
+ l = row.location_id
+ key = (l, row.parameter_id)
+ if last_record != key:
+ last_record = key
append(value)
- sum += value
- rec_cnt += 1
- if row.location_id not in location_cnt:
- location_cnt.append(row.location_id)
- if rec_cnt == 0:
+ if l not in locations:
+ new_location(l)
+
+ # Aggregate the values
+ values_len = len(values)
+ if not values_len:
return
- num_list.sort()
- mean = float(sum) / rec_cnt
- min = num_list[0]
- max = num_list[rec_cnt - 1]
- if rec_cnt % 2 == 0:
- median = float(num_list[rec_cnt / 2] + num_list[rec_cnt / 2 - 1]) / 2.0
- else:
- median = num_list[rec_cnt / 2]
- loc_cnt = len(location_cnt)
+
+ import numpy
+
+ values_sum = sum(values)
+ values_min = min(values)
+ values_max = max(values)
+ values_avg = float(values_sum) / values_len
+ values_med = numpy.median(values)
+ values_mad = numpy.median([abs(v - values_med) for v in values])
+
+ reported_count = len(locations)
# Store Resilience value in the stats_aggregate table
query = (stable.location_id == location_id) & \
@@ -335,29 +339,35 @@ def vulnerability_resilience(loc_level,
(stable.parameter_id == resilience_pid)
record = db(query).select(stable.id,
limitby=(0, 1)).first()
+
if record:
+ # Update
db(query).update(date = date_period_start,
end_date = date_period_end,
- reported_count = loc_cnt,
- ward_count = ward_cnt,
- min = min,
- max = max,
- mean = mean,
- median = median,
+ reported_count = reported_count,
+ ward_count = ward_count,
+ min = values_min,
+ max = values_max,
+ mean = values_avg,
+ median = values_med,
+ mad = values_mad,
)
else:
+ # Insert new
id = stable.insert(agg_type = 4, # indicator
parameter_id = resilience_pid,
location_id = location_id,
date = date_period_start,
end_date = date_period_end,
- reported_count = loc_cnt,
- ward_count = ward_cnt,
- min = min,
- max = max,
- mean = mean,
- median = median,
+ reported_count = reported_count,
+ ward_count = ward_count,
+ min = values_min,
+ max = values_max,
+ mean = values_avg,
+ median = values_med,
+ mad = values_mad,
)
+ return
# -------------------------------------------------------------------------
@staticmethod
Please sign in to comment.
Something went wrong with that request. Please try again.