/
compute_deltas.py
executable file
·58 lines (39 loc) · 1.73 KB
/
compute_deltas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
from pymongo import Connection, objectid
import config
connection = Connection()
db = connection[config.CENSUS_DB]
collection = db[config.GEOGRAPHIES_COLLECTION]
row_count = 0
computations = 0
for geography in collection.find({}, fields=['data']):
row_count += 1
if 'delta' not in geography['data']:
geography['data']['delta'] = {}
if 'pct_change' not in geography['data']:
geography['data']['pct_change'] = {}
# Skip geographies which did not have data in 2000 (e.g. newly established places)
if '2000' not in geography['data']:
continue
for table in geography['data']['2010']:
# Skip tables not in both years
if table not in geography['data']['2000']:
continue
if table not in geography['data']['delta']:
geography['data']['delta'][table] = {}
if table not in geography['data']['pct_change']:
geography['data']['pct_change'][table] = {}
for k, v in geography['data']['2010'][table].items():
# Skip data not in both tables (added since 2000)
if k not in geography['data']['2000'][table]:
continue
value_2010 = float(v)
value_2000 = float(geography['data']['2000'][table][k])
if value_2000 == 0:
continue
geography['data']['delta'][table][k] = str(value_2010 - value_2000)
geography['data']['pct_change'][table][k] = str((value_2010 - value_2000) / value_2000)
collection.update({ '_id': objectid.ObjectId(geography['_id']) }, { '$set': { 'data': geography['data'] } }, safe=True)
computations += 1
print 'Row count: %i' % row_count
print 'Computations: %i' % computations