-
Notifications
You must be signed in to change notification settings - Fork 2k
/
action.py
213 lines (172 loc) · 6.82 KB
/
action.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import logging
import json
import urlparse
import datetime
import pylons
import requests
import ckan.lib.navl.dictization_functions
import ckan.logic as logic
import ckan.plugins as p
import ckanext.datapusher.logic.schema as dpschema
log = logging.getLogger(__name__)
_get_or_bust = logic.get_or_bust
_validate = ckan.lib.navl.dictization_functions.validate
def datapusher_submit(context, data_dict):
''' Submit a job to the datapusher. The datapusher is a service that
imports tabular data into the datastore.
:param resource_id: The resource id of the resource that the data
should be imported in. The resource's URL will be used to get the data.
:type resource_id: string
:param set_url_type: If set to True, the ``url_type`` of the resource will
be set to ``datastore`` and the resource URL will automatically point
to the :ref:`datastore dump <dump>` URL. (optional, default: False)
:type set_url_type: bool
Returns ``True`` if the job has been submitted and ``False`` if the job
has not been submitted, i.e. when the datapusher is not configured.
:rtype: bool
'''
schema = context.get('schema', dpschema.datapusher_submit_schema())
data_dict, errors = _validate(data_dict, schema, context)
if errors:
raise p.toolkit.ValidationError(errors)
res_id = data_dict['resource_id']
p.toolkit.check_access('datapusher_submit', context, data_dict)
datapusher_url = pylons.config.get('ckan.datapusher.url')
site_url = pylons.config['ckan.site_url']
callback_url = site_url + p.toolkit.url_for(
controller='api', action='action',
logic_function='datapusher_hook', ver=3
)
user = p.toolkit.get_action('user_show')(context, {'id': context['user']})
task = {
'entity_id': res_id,
'entity_type': 'resource',
'task_type': 'datapusher',
'last_updated': str(datetime.datetime.now()),
'state': 'submitting',
'key': 'datapusher',
'value': '{}',
'error': '{}',
}
try:
task_id = p.toolkit.get_action('task_status_show')(context, {
'entity_id': res_id,
'task_type': 'datapusher',
'key': 'datapusher'
})['id']
task['id'] = task_id
except logic.NotFound:
pass
context['ignore_auth'] = True
result = p.toolkit.get_action('task_status_update')(context, task)
task_id = result['id']
try:
r = requests.post(
urlparse.urljoin(datapusher_url, 'job'),
headers={
'Content-Type': 'application/json'
},
data=json.dumps({
'api_key': user['apikey'],
'job_type': 'push_to_datastore',
'result_url': callback_url,
'metadata': {
'ckan_url': site_url,
'resource_id': res_id,
'set_url_type': data_dict.get('set_url_type', False)
}
}))
r.raise_for_status()
except requests.exceptions.ConnectionError, e:
error = {'message': 'Could not connect to DataPusher.',
'details': str(e)}
task['error'] = json.dumps(error)
task['state'] = 'error'
task['last_updated'] = str(datetime.datetime.now()),
p.toolkit.get_action('task_status_update')(context, task)
raise p.toolkit.ValidationError(error)
except requests.exceptions.HTTPError, e:
m = 'An Error occurred while sending the job: {0}'.format(e.message)
try:
body = e.response.json()
except ValueError:
body = e.response.text
error = {'message': m,
'details': body,
'status_code': r.status_code}
task['error'] = json.dumps(error)
task['state'] = 'error'
task['last_updated'] = str(datetime.datetime.now()),
p.toolkit.get_action('task_status_update')(context, task)
raise p.toolkit.ValidationError(error)
value = json.dumps({'job_id': r.json()['job_id'],
'job_key': r.json()['job_key']})
task['value'] = value
task['state'] = 'pending'
task['last_updated'] = str(datetime.datetime.now()),
p.toolkit.get_action('task_status_update')(context, task)
return True
def datapusher_hook(context, data_dict):
''' Update datapusher task. This action is typically called by the
datapusher whenever the status of a job changes.
:param metadata: metadata produced by datapuser service must have
resource_id property.
:type metadata: dict
:param status: status of the job from the datapusher service
:type status: string
'''
metadata, status = _get_or_bust(data_dict, ['metadata', 'status'])
p.toolkit.check_access('datapusher_submit', context, data_dict)
res_id = metadata.get('resource_id')
task = p.toolkit.get_action('task_status_show')(context, {
'entity_id': res_id,
'task_type': 'datapusher',
'key': 'datapusher'
})
task['state'] = status
task['last_updated'] = str(datetime.datetime.now())
context['ignore_auth'] = True
p.toolkit.get_action('task_status_update')(context, task)
def datapusher_status(context, data_dict):
''' Get the status of a datapusher job for a certain resource.
:param resource_id: The resource id of the resource that you want the
datapusher status for.
:type resource_id: string
'''
p.toolkit.check_access('datapusher_status', context, data_dict)
if 'id' in data_dict:
data_dict['resource_id'] = data_dict['id']
res_id = _get_or_bust(data_dict, 'resource_id')
task = p.toolkit.get_action('task_status_show')(context, {
'entity_id': res_id,
'task_type': 'datapusher',
'key': 'datapusher'
})
datapusher_url = pylons.config.get('ckan.datapusher.url')
if not datapusher_url:
raise p.toolkit.ValidationError(
{'configuration': ['ckan.datapusher.url not in config file']})
value = json.loads(task['value'])
job_key = value.get('job_key')
job_id = value.get('job_id')
url = None
job_detail = None
if job_id:
url = urlparse.urljoin(datapusher_url, 'job' + '/' + job_id)
try:
r = requests.get(url, headers={'Content-Type': 'application/json',
'Authorization': job_key})
r.raise_for_status()
job_detail = r.json()
except (requests.exceptions.ConnectionError,
requests.exceptions.HTTPError), e:
job_detail = {'error': 'cannot connect to datapusher'}
return {
'status': task['state'],
'job_id': job_id,
'job_url': url,
'last_updated': task['last_updated'],
'job_key': job_key,
'task_info': job_detail,
'error': json.loads(task['error'])
}