This repository has been archived by the owner on May 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
/
bea.py
139 lines (128 loc) · 5.23 KB
/
bea.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from urllib import urlencode
import json
import os
from geomancer.app_config import MANCER_KEYS
from geomancer.helpers import encoded_dict
from geomancer.mancers.geotype import State, StateFIPS
from geomancer.mancers.base import BaseMancer, MancerError
from string import punctuation
import re
from urlparse import urlparse
import us
class BureauEconomicAnalysis(BaseMancer):
"""
Subclassing the main BaseMancer class
"""
name = 'Bureau of Economic Analysis'
machine_name = 'bureau_economic_analysis'
base_url = 'http://www.bea.gov/api/data'
info_url = 'http://www.bea.gov'
description = """
GDP & Personal Income Data (2013) from the Bureau of Economic Analysis
"""
api_key_required = True
def __init__(self, api_key=None):
self.api_key = api_key
BaseMancer.__init__(self)
def get_metadata(self):
datasets = [
{
'table_id': 'GDP_SP',
'human_name': 'Nominal GDP',
'description': '2013 Gross Domestic Product (GDP) (state annual product)',
'source_name': self.name,
'source_url': 'http://bea.gov/regional/index.htm',
'geo_types': [State()],
'columns': ['2013 GDP'],
'count': 1
},
{
'table_id': 'RGDP_SP',
'human_name': 'Real GDP',
'description': '2013 Real GDP (state annual product)',
'source_name': self.name,
'source_url': 'http://bea.gov/regional/index.htm',
'geo_types': [State()],
'columns': ['2013 Real GDP'],
'count': 1
},
{
'table_id': 'PCRGDP_SP',
'human_name': 'Real GDP - Per Capita',
'description': '2013 Per capita Real GDP (state annual product)',
'source_name': self.name,
'source_url': 'http://bea.gov/regional/index.htm',
'geo_types': [State()],
'columns': ['2013 Per Capita Real GDP'],
'count': 1
},
{
'table_id': 'TPI_SI',
'human_name': 'Personal Income - Total',
'description': '2013 Total Personal Income (state annual income)',
'source_name': self.name,
'source_url': 'http://bea.gov/regional/index.htm',
'geo_types': [State()],
'columns': ['2013 Total Personal Income'],
'count': 1
},
{
'table_id': 'PCPI_SI',
'human_name': 'Personal Income - Per Capita',
'description': '2013 Per Capita personal income (state annual income)',
'source_name': self.name,
'source_url': 'http://bea.gov/regional/index.htm',
'geo_types': [State()],
'columns': ['2013 Per Capita Personal Income'],
'count': 1
}
]
return datasets
def lookup_state_name(self, term):
st = us.states.lookup(term)
if not st:
st = [s for s in us.STATES if getattr(s, 'ap_abbr') == term]
if st:
return st.name
else:
return term
def geo_lookup(self, search_term, geo_type=None):
regex = re.compile('[%s]' % re.escape(punctuation))
search_term = regex.sub('', search_term)
if geo_type == 'state':
return {'term': search_term, 'geoid': self.lookup_state_name(search_term)}
else:
return {'term': search_term, 'geoid': search_term}
def search(self, geo_ids=None, columns=None):
column_names = {
'GDP_SP': '2013 GDP (millions)',
'RGDP_SP': '2013 Real GDP (millions of chained 2009 dollars)',
'PCRGDP_SP': '2013 Per Capita Real GDP (chained 2009 dollars)',
'TPI_SI': '2013 Total Personal Income (thousands of dollars)',
'PCPI_SI': '2013 Per Capita Personal Income (dollars)'
}
results = {'header':[]}
for col in columns:
url = self.base_url+'/?UserID=%s&method=GetData&datasetname=RegionalData&KeyCode=%s&Year=2013&ResultFormat=json' %(self.api_key, col)
try:
response = self.urlopen(url)
except scrapelib.HTTPError, e:
try:
body = json.loads(e.body.json()['error'])
except ValueError:
body = None
except AttributeError:
body = e.body
raise MancerError('BEA API returned an error', body=body)
raw_results = json.loads(response)
raw_data = raw_results['BEAAPI']['Results']['Data']
results['header'].append(column_names[col])
for geo_type, geo_id in geo_ids:
if not results.get(geo_id):
results[geo_id] = []
if geo_type == 'state': #### handle state fips?
for geo_data in raw_data: #this is not efficient...make this better
if geo_data['GeoName'] == geo_id:
results[geo_id].append(geo_data['DataValue'])
break
return results