From 8749c3643dfe55afbb02cd4bd6b4073a2e3f8782 Mon Sep 17 00:00:00 2001 From: Rufus Pollock Date: Sun, 4 Mar 2012 23:54:10 +0000 Subject: [PATCH 1/7] [#1797,master,doc][s]: webstore -> datastore rename plus extensive reworking and improvement of datastore docs. --- doc/conf.py | 2 +- doc/datastore.rst | 127 ++++++++++++++++++++++++++++++++++++++++++++++ doc/index.rst | 1 + doc/webstore.rst | 102 ------------------------------------- 4 files changed, 129 insertions(+), 103 deletions(-) create mode 100644 doc/datastore.rst delete mode 100644 doc/webstore.rst diff --git a/doc/conf.py b/doc/conf.py index fa9828cef04..61880937649 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -43,7 +43,7 @@ # General information about the project. project = u'CKAN Data Management System Documentation' project_short_name = u'CKAN' -copyright = u'&Copyright copy; 2009-2012, Open Knowledge Foundation' +copyright = u'© Copyright 2009-2012, Open Knowledge Foundation' html_show_sphinx = False # The version info for the project you're documenting, acts as replacement for diff --git a/doc/datastore.rst b/doc/datastore.rst new file mode 100644 index 00000000000..58742bc34b3 --- /dev/null +++ b/doc/datastore.rst @@ -0,0 +1,127 @@ +========= +DataStore +========= + +The CKAN DataStore provides a database for structured storage of data together +with a powerful Web API, all seamlessly integrated into the CKAN interface and +authorization system. + +Overview +======== + +The following short set of slides provide a brief overview and introduction to +the DataStore and the Data API. + +.. raw:: html + + + +Relationship to FileStore +========================= + +The DataStore is distinct but complementary to the FileStore (see +:doc:`file-upload`). In contrast to the the FileStore which provides 'blob' +storage of whole files with no way to access or query parts of that file, the +DataStore is like a database in which individual data elements are accessible +and queryable. To illustrate this distinction consider storing a spreadsheet +file like a CSV or Excel document. In the FileStore this filed would be stored +directly. To access it you would download the file as a whole. By contrast, if +the spreadsheet data is stored in the DataStore one would be able to access +individual spreadsheet rows via a simple web-api as well as being able to make +queries over the spreadsheet contents. + +Using the DataStore Data API +============================ + +The DataStore's Data API, which derives from the underlying ElasticSearch +data-table, is RESTful and JSON-based with extensive query capabilities. + +Each resource in a CKAN instance has an associated DataStore 'database'. This +database will be accessible via a web interface at:: + + /api/data/{resource-id} + +This interface to this data is *exactly* the same as that provided by +ElasticSearch to documents of a specific type in one of its indices. + +So, for example, to see the fields in this database do:: + + /api/data/{resource-id}/_mapping + +To do simple search do:: + + /api/data/{resource-id}/_search?q=abc + +For more on searching see: http://www.elasticsearch.org/guide/reference/api/search/uri-request.html + + +Installation and Configuration +============================= + +The DataStore uses ElasticSearch_ as the persistence and query layer with CKAN +wrapping this with a thin authorization and authentication layer. + +It also requires the use of Nginx as your webserver as its XSendfile_ feature +is used to transparently hand off data requests to ElasticSeach internally. + +.. _ElasticSearch: http://www.elasticsearch.org/ +.. _XSendfile: http://wiki.nginx.org/XSendfile + +1. Install ElasticSearch_ +------------------------- + +Please see the ElasticSearch_ documentation. + +2. Configure Nginx +------------------ + +You must add to your Nginx CKAN site entry the following:: + + location /elastic/ { + internal; + # location of elastic search + proxy_pass http://0.0.0.0:9200/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + +.. note:: update the proxy_pass field value to point to your ElasticSearch + instance (if it is not localhost and default port). + +3. Enable datastore features in CKAN +----------------------------------- + +In your config file set:: + + ckan.webstore.enabled = 1 + + +DataStorer: Automatically Add Data to the DataStore +================================================= + +Often, when you upload data you will want it to be automatically added to the +DataStore. This requires some processing, to extract the data from your files +and to add it to the DataStore in the format it understands. For more +information on the architecture see http://wiki.ckan.org/Storage. + +This task of automatically parsing and then adding data to the datastore is +performed by a DataStorer, a queue process that runs asynchronously and can be +triggered by uploads or other activities. The DataStorer is an extension and can +be found, along with installation instructions, at: + +https://github.com/okfn/ckanext-webstorer + + +How It Works (Technically) +========================== + +1. Request arrives at e.g. /dataset/{id}/resource/{resource-id}/data +2. CKAN checks authentication and authorization. +3. (Assuming OK) CKAN hands (internally) to ElasticSearch which handles the + request + + * To do this we use Nginx's Sendfile / Accel-Redirect feature. This allows + us to hand off a user request *directly* to ElasticSearch after the + authentication and authorization. This avoids the need to proxy the + request and results through CKAN code. + diff --git a/doc/index.rst b/doc/index.rst index 7dfe69b822c..a36f4102fa4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -30,6 +30,7 @@ Contents: database_dumps i18n file-upload + datastore configuration api apiv3 diff --git a/doc/webstore.rst b/doc/webstore.rst deleted file mode 100644 index 17442ffac0d..00000000000 --- a/doc/webstore.rst +++ /dev/null @@ -1,102 +0,0 @@ -======== -Webstore -======== - -Webstore is a structured data store integrated into CKAN. It uses ElasticSearch_ -as the persistence and query layer with CKAN wrapping this with a thin -authorization and authentication layer. - -To use you will need to be using Nginx as your webserver as we utilize its -XSendfile_ feature to transparently hand off data requests to ElasticSeach -internally. - -.. _ElasticSearch: http://www.elasticsearch.org/ -.. _XSendfile: http://wiki.nginx.org/XSendfile - -Using the Webstore -================== - -Each resource in a CKAN instance will now have a Webstore 'database' associated -with it. This database will be accessible via a web interface at:: - - /api/data/{resource-id} - -This interface to this data is *exactly* the same as that provided by -ElasticSearch to documents of a specific type in one of its indices. - -So, for example, to see the fields in this database do:: - - /api/data/{resource-id}/_mapping - -To do simple search do:: - - /api/data/{resource-id}/_search?q=abc - -For more on searching see: http://www.elasticsearch.org/guide/reference/api/search/uri-request.html - - -Installation and Configuration -============================= - -1. Install ElasticSearch_ -------------------------- - -Please see the ElasticSearch_ documentation. - -2. Configure Nginx ------------------- - -You must add to your Nginx CKAN site entry the following:: - - location /elastic/ { - internal; - # location of elastic search - proxy_pass http://0.0.0.0:9200/; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } - -.. note:: update the proxy_pass field value to point to your ElasticSearch - instance (if it is not localhost and default port). - -3. Enable webstore features in CKAN ------------------------------------ - -In your config file set:: - - ckan.webstore.enabled = 1 - -4. Test it ----------- - - - -Webstorer: Automatically Add Data to the Webstore -================================================= - -Often, when you upload data you will want it to be automatically added to the -Webstore. This requires some processing, to extract the data from your files -and to add it to the Webstore in the format it understands. For more -information on the architecture see http://wiki.ckan.org/Storage. - -This task of automatically parsing and then adding data to the webstore is -performed by a Webstorer, a queue process that runs asynchronously and can be -triggered by uploads or other activities. The Webstorer is an extension and can -be found, along with installation instructions, at: - -https://github.com/okfn/ckanext-webstorer - - -How It Works (Technically) -========================== - -1. Request arrives at e.g. /dataset/{id}/resource/{resource-id}/data -2. CKAN checks authentication and authorization. -3. (Assuming OK) CKAN hands (internally) to ElasticSearch which handles the - request - - * To do this we use Nginx's Sendfile / Accel-Redirect feature. This allows - us to hand off a user request *directly* to ElasticSearch after the - authentication and authorization. This avoids the need to proxy the - request and results through CKAN code. - From 432f135a35efb7f7d7c056498090c55896cb7901 Mon Sep 17 00:00:00 2001 From: Rufus Pollock Date: Mon, 5 Mar 2012 00:36:59 +0000 Subject: [PATCH 2/7] [#1797,refactor][s]: rename webstore -> datastore throughout. --- ckan/config/deployment.ini_tmpl | 4 ++-- ckan/config/routing.py | 10 +++++----- ckan/controllers/{webstore.py => datastore.py} | 2 +- .../functional/{test_webstore.py => test_datastore.py} | 10 +++++----- doc/datastore.rst | 2 +- test-core.ini | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) rename ckan/controllers/{webstore.py => datastore.py} (97%) rename ckan/tests/functional/{test_webstore.py => test_datastore.py} (85%) diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 7a711fc8ee1..f9d25c4eed9 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -161,8 +161,8 @@ ckan.locale_order = en de fr it es pl ru nl sv no cs_CZ hu pt_BR fi bg ca sq sr ckan.locales_filtered_out = el ro lt sl ## Webstore -## Uncommment to enable webstore -# ckan.webstore.enabled = 1 +## Uncommment to enable datastore +# ckan.datastore.enabled = 1 ## =================================== ## Extensions diff --git a/ckan/config/routing.py b/ckan/config/routing.py index b99178a2e67..2ed0d3f4939 100644 --- a/ckan/config/routing.py +++ b/ckan/config/routing.py @@ -129,13 +129,13 @@ def make_map(): m.connect('/util/status', action='status') ## Webstore - if config.get('ckan.webstore.enabled', False): - map.connect('webstore_read', '/api/data/{id}{url:(/.*)?}', - controller='webstore', action='read', url='', + if config.get('ckan.datastore.enabled', False): + map.connect('datastore_read', '/api/data/{id}{url:(/.*)?}', + controller='datastore', action='read', url='', conditions={'method': ['GET']} ) - map.connect('webstore_write', '/api/data/{id}{url:(/.*)?}', - controller='webstore', action='write', url='', + map.connect('datastore_write', '/api/data/{id}{url:(/.*)?}', + controller='datastore', action='write', url='', conditions={'method': ['PUT','POST', 'DELETE']} ) diff --git a/ckan/controllers/webstore.py b/ckan/controllers/datastore.py similarity index 97% rename from ckan/controllers/webstore.py rename to ckan/controllers/datastore.py index 227ba707354..f115e86d4a0 100644 --- a/ckan/controllers/webstore.py +++ b/ckan/controllers/datastore.py @@ -3,7 +3,7 @@ from ckan.logic import get_action, check_access from ckan.logic import NotFound, NotAuthorized, ValidationError -class WebstoreController(BaseController): +class DatastoreController(BaseController): def _make_redirect(self, id, url=''): index_name = 'ckan-%s' % g.site_id query_string = request.environ['QUERY_STRING'] diff --git a/ckan/tests/functional/test_webstore.py b/ckan/tests/functional/test_datastore.py similarity index 85% rename from ckan/tests/functional/test_webstore.py rename to ckan/tests/functional/test_datastore.py index ad8d12ef326..595f2d7a02e 100644 --- a/ckan/tests/functional/test_webstore.py +++ b/ckan/tests/functional/test_datastore.py @@ -22,7 +22,7 @@ def teardown_class(self): def test_read(self): dataset = model.Package.by_name(CreateTestData.pkg_names[0]) resource_id = dataset.resources[0].id - offset = url_for('webstore_read', id=resource_id) + offset = url_for('datastore_read', id=resource_id) res = self.app.get(offset) assert_equal(res.status, 200) assert_equal(res.body, '') @@ -30,7 +30,7 @@ def test_read(self): assert_equal(headers['X-Accel-Redirect'], '/elastic/ckan-test.ckan.net/%s?' % resource_id) - offset = url_for('webstore_read', id=resource_id, url='/_search') + offset = url_for('datastore_read', id=resource_id, url='/_search') res = self.app.get(offset) assert_equal(res.status, 200) headers = dict(res.headers) @@ -41,11 +41,11 @@ def test_update(self): dataset = model.Package.by_name(CreateTestData.pkg_names[0]) resource_id = dataset.resources[0].id - offset = url_for('webstore_write', id='does-not-exist') + offset = url_for('datastore_write', id='does-not-exist') res = self.app.post(offset, status=404) assert res.status == 404 - offset = url_for('webstore_write', id=resource_id) + offset = url_for('datastore_write', id=resource_id) res = self.app.post(offset) # in fact visitor can edit! # assert res.status in [401,302], res.status @@ -55,7 +55,7 @@ def test_update(self): % resource_id) - offset = url_for('webstore_write', id=resource_id, url='/_mapping') + offset = url_for('datastore_write', id=resource_id, url='/_mapping') res = self.app.post(offset) assert res.status == 200 headers = dict(res.headers) diff --git a/doc/datastore.rst b/doc/datastore.rst index 58742bc34b3..5dfcdb8059f 100644 --- a/doc/datastore.rst +++ b/doc/datastore.rst @@ -93,7 +93,7 @@ You must add to your Nginx CKAN site entry the following:: In your config file set:: - ckan.webstore.enabled = 1 + ckan.datastore.enabled = 1 DataStorer: Automatically Add Data to the DataStore diff --git a/test-core.ini b/test-core.ini index 575b3e39a2c..f2fa1f4a2cd 100644 --- a/test-core.ini +++ b/test-core.ini @@ -59,7 +59,7 @@ ckan.mail_from = info@test.ckan.net ckan.locale_default = en -ckan.webstore.enabled = 1 +ckan.datastore.enabled = 1 ckanext.stats.cache_enabled = 0 From 5723d492a34245969e90ad8520d821fde48bc23e Mon Sep 17 00:00:00 2001 From: Toby Date: Mon, 5 Mar 2012 12:28:45 +0000 Subject: [PATCH 3/7] [xs][bugfix] no need to decode CKAN_CURRENT_URL --- ckan/templates/layout_base.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/templates/layout_base.html b/ckan/templates/layout_base.html index 5039871d970..22ea24cd58b 100644 --- a/ckan/templates/layout_base.html +++ b/ckan/templates/layout_base.html @@ -184,7 +184,7 @@

Languages

- + From 8c19d9588183d011910953b18c72e401c1e8e079 Mon Sep 17 00:00:00 2001 From: Toby Date: Tue, 6 Mar 2012 12:55:23 +0000 Subject: [PATCH 6/7] [xs] make lang cookie a session one --- ckan/lib/i18n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/lib/i18n.py b/ckan/lib/i18n.py index f53863a8412..3cc1fea1da7 100644 --- a/ckan/lib/i18n.py +++ b/ckan/lib/i18n.py @@ -100,7 +100,7 @@ def handle_request(request, tmpl_context): # remember this because repoze.who does it's own redirect. try: if request.cookies.get('ckan_lang') != lang: - response.set_cookie('ckan_lang', lang, max_age=3600) + response.set_cookie('ckan_lang', lang) except AttributeError: # when testing FakeRequest does not have cookies pass From a6aca286ce0e65b46a6beed19a7c4cc977c3c623 Mon Sep 17 00:00:00 2001 From: John Glover Date: Wed, 7 Mar 2012 12:12:28 +0000 Subject: [PATCH 7/7] [xs] tidy up: remove unnecessary code --- ckan/logic/validators.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ckan/logic/validators.py b/ckan/logic/validators.py index b142a69902e..ec635c296cd 100644 --- a/ckan/logic/validators.py +++ b/ckan/logic/validators.py @@ -273,12 +273,6 @@ def tag_string_convert(key, data, errors, context): and parses tag names. These are added to the data dict, enumerated. They are also validated.''' - tag_string = data[key] - - tags = [tag.strip() \ - for tag in tag_string.split(',') \ - if tag.strip()] - if isinstance(data[key], basestring): tags = [tag.strip() \ for tag in data[key].split(',') \