-
Notifications
You must be signed in to change notification settings - Fork 1
/
stat.py
136 lines (119 loc) · 4.19 KB
/
stat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import urllib, logging
from models import Histogram
def get(prop):
glbs = globals()
Summaries = map(lambda p: glbs[p], [p for p in glbs if '__' not in p and p is not 'get'])
for Class in Summaries:
if hasattr(Class, 'match_type') and Class.match_type and prop in Class.match_type:
return Class
return NoSummary
class NoSummary(object):
match_type = ['off', 'none']
@classmethod
def prepare(cls, datum):
return datum
@classmethod
def calculate(cls, stats, datum):
"""docstring for calculate"""
if (not stats.first_):
stats.first_ = datum
stats.last_ = datum
if (not stats.count):
stats.count = 0
stats.count += 1
class Summary(NoSummary):
@classmethod
def calculate(cls, stats, datum):
"""docstring for calculate"""
super(Summary, cls).calculate(stats, datum)
if ('hits' not in stats.histograms):
stats.histograms.append('hits')
hist = Histogram(statistic = stats, name = 'hits')
try:
hist.index = str(datum.value) # careful
except:
return logging.critical('Could not str(%s)' % datum.value)
hist.datum = datum
if (not hist.put()):
return logging.critical('Could not save hist: %s' % hist)
class NumberSummary(Summary):
match_type = ['number', 'float', 'int', 'integer', 'long']
@classmethod
def prepare(cls, value):
"""docstring for prepare"""
try:
return float(value or 1)
except:
logging.critical('Could not convert %s into a float' % value)
return None
@classmethod
def calculate(cls, stats, datum):
"""docstring for calculate"""
super(NumberSummary, cls).calculate(stats, datum)
if (not hasattr(stats, 'min') or datum.value < stats.min):
stats.min = datum.value
if (not hasattr(stats, 'max') or datum.value > stats.max):
stats.max = datum.value
if (not hasattr(stats, 'sum')):
stats.sum = 0
stats.sum += datum.value
if (not hasattr(stats, 'mean')):
stats.mean = 0
stats.mean = stats.sum / stats.count
class StringSummary(Summary):
match_type = ['str', 'string', 'text']
class DatetimeSummary(Summary):
match_type = ['date', 'datetime', 'timestamp']
DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
@classmethod
def calculate(cls, stats, datum):
"""docstring for calculate"""
NoSummary.calculate(stats, datum)
import datetime
if (datum.type == 'timestamp'):
try:
datum.timestamp = float(datum.value)
except TypeError:
return logging.critical('Could not convert string timestamp (%s) into integer' % datum.value)
try:
datum.datetime = datetime.datetime.fromtimestamp(datum.timestamp)
except ValueError:
return logging.critical('Could not datetime.fromtimestamp(%s)' % datum.timestamp)
elif ('date' in datum.type):
try:
datum.datetime = datetime.datetime.strptime(datum.value, DatetimeSummary.DATETIME_FORMAT) # careful
except ValueError:
return logging.critical('Could not datetime.strptime parse: %s' % datum.value)
import time
try:
datum.timestamp = time.mktime(datum.datetime.timetuple())
except ValueError, OverflowError:
return logging.critical('Could not time.mktime(%s)' % datum.datetime.timetuple())
else:
return logging.critical('Unexpected type: %s for calc_date_statistics' % datum.type)
timetuple = datum.datetime.timetuple()
for i, bucket in enumerate(['year%s', 'month%s', 'day%s', 'hour%s', 'minute%s', 'second%s', 'weekday%s', 'day%s_of_the_year']):
attr = bucket % 's'
if (attr not in stats.histograms):
stats.histograms.append(attr)
hist = Histogram(statistic = stats, name = attr)
hist.index = str(timetuple[i])
hist.datum = datum
if (not hist.put()):
return logging.critical('Could not save hist: %s' % hist)
'''
### Location
- longitude
- latitude
- statistics
- area
- centroid
- boundary
- box (histogram)
### Interval
- start (date)
- stop (date)
- duration (number)
- statistics
-
'''