-
Notifications
You must be signed in to change notification settings - Fork 146
/
interfaces.py
219 lines (171 loc) · 8.04 KB
/
interfaces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
from ckan.plugins.interfaces import Interface
class IDCATRDFHarvester(Interface):
def before_download(self, url, harvest_job):
'''
Called just before the remote RDF file is downloaded
It returns a tuple with the url (which can be modified) and an
optional list of error messages.
If the url value evaluates to False the gather stage will be stop.
This extension point can be useful to validate the URL using an
external service.
:param url: The harvest source URL, ie the remote RDF file location
:type url: string
:param harvest_job: A ``HarvestJob`` domain object which contains a
reference to the harvest source
(``harvest_job.source``).
:type harvest_job: object
:returns: A tuple with two items:
* The url. If this is False the gather stage will stop.
* A list of error messages. These will get stored as gather
errors by the harvester
:rtype: tuple
'''
return url, []
def update_session(self, session):
'''
Called before making the HTTP request to the remote site to download
the RDF file.
It returns a valid `requests` session object.
This extension point can be useful to add special parameters to the
request (e.g. add client certificates).
:param session: The requests session object
:type session: object
:returns: The updated requests session object
:rtype: object
'''
return session
def after_download(self, content, harvest_job):
'''
Called just after the remote RDF file has been downloaded
It returns a tuple with the content (which can be modified) and an
optional list of error messages.
If the content value evaluates to False the gather stage will stop.
This extension point can be useful to validate the file contents using
an external service.
:param content: The remote RDF file contents
:type content: string
:param harvest_job: A ``HarvestJob`` domain object which contains a
reference to the harvest source
(``harvest_job.source``).
:type harvest_job: object
:returns: A tuple with two items:
* The file content. If this is False the gather stage will
stop.
* A list of error messages. These will get stored as gather
errors by the harvester
:rtype: tuple
'''
return content, []
def after_parsing(self, rdf_parser, harvest_job):
'''
Called just after the content from the remote RDF file has been parsed
It returns a tuple with the parser (which can be modified) and an
optional list of error messages.
This extension point can be useful to work with the graph and put it to
other stores, e.g. a triple store.
:param rdf_parser: The RDF parser with the remote content as a graph object
:type rdf_parser: ckanext.dcat.processors.RDFParser
:param harvest_job: A ``HarvestJob`` domain object which contains a
reference to the harvest source
(``harvest_job.source``).
:type harvest_job: object
:returns: A tuple with two items:
* The RDF parser. If this is False the gather stage will
stop.
* A list of error messages. These will get stored as gather
errors by the harvester
:rtype: tuple
'''
return rdf_parser, []
def before_update(self, harvest_object, dataset_dict, temp_dict):
'''
Called just before the ``package_update`` action.
It may be used to preprocess the dataset dict.
If the content of the dataset dict is emptied (i.e. set to ``None``),
the dataset will not be updated in CKAN, but simply ignored.
Implementations may store some temp values in temp_dict, that will be
then passed back in the ``after_update`` call.
:param harvest_object: A ``HarvestObject`` domain object.
:type harvest_job: object
:param dataset_dict: The dataset dict already parsed by the RDF parser
(and related plugins).
:type dataset_dict: dict
:param temp_dict: A dictionary, shared among all plugins, for storing
temp data. Such dict will be passed back in the
``after_update`` call.
:type temp_dict: dict
'''
pass
def after_update(self, harvest_object, dataset_dict, temp_dict):
'''
Called just after a successful ``package_update`` action has been
performed.
:param harvest_object: A ``HarvestObject`` domain object.
:type harvest_job: object
:param dataset_dict: The dataset dict that has just been stored into
the DB.
:type dataset_dict: dict
:param temp_dict: A dictionary, shared among all plugins, for storing
temp data.
:type temp_dict: dict
:returns: A string containing an error message, or None. If the error
string is not None, it will be saved as an import error,
and dataset importing will be rolled back,
:rtype: string
'''
return None
def before_create(self, harvest_object, dataset_dict, temp_dict):
'''
Called just before the ``package_create`` action.
It may be used to preprocess the dataset dict.
If the content of the dataset dict is emptied (i.e. set to ``None``),
the dataset will not be created in CKAN, but simply ignored.
Implementations may store some temp values in temp_dict, that will be
then passed back in the ``after_create`` call.
:param harvest_object: A ``HarvestObject`` domain object.
:type harvest_job: object
:param dataset_dict: The dataset dict already parsed by the RDF parser
(and related plugins).
:type dataset_dict: dict
:param temp_dict: A dictionary, shared among all plugins, for storing
temp data. Such dict will be passed back in the
``after_create`` call.
:type temp_dict: dict
'''
pass
def after_create(self, harvest_object, dataset_dict, temp_dict):
'''
Called just after a successful ``package_create`` action has been
performed.
:param harvest_object: A ``HarvestObject`` domain object.
:type harvest_job: object
:param dataset_dict: The dataset dict that has just been stored into
the DB.
:type dataset_dict: dict
:param temp_dict: A dictionary, shared among all plugins, for storing
temp data.
:type temp_dict: dict
:returns: A string containing an error message, or None. If the error
string is not None, it will be saved as an import error,
and dataset importing will be rolled back,
:rtype: string
'''
return None
def update_package_schema_for_create(self, package_schema):
'''
Called just before the ``package_create`` action.
:param package_schema: The default create package schema dict.
:type package_schema_dict: dict
:returns: The updated package_schema object
:rtype: object
'''
return package_schema
def update_package_schema_for_update(self, package_schema):
'''
Called just before the ``package_update`` action.
:param package_schema: The default update package schema dict.
:type package_schema_dict: dict
:returns: The updated package_schema object
:rtype: object
'''
return package_schema