Merge pull request #267 from alphagov/rename_bucket_to_data_set

Rename bucket to data set
alphagov · Apr 23, 2014 · f8aac8d · f8aac8d
2 parents 8d652a1 + 3cdbdd8
commit f8aac8d
Show file tree

Hide file tree

Showing 70 changed files with 767 additions and 761 deletions.
diff --git a/README.md b/README.md
@@ -8,12 +8,12 @@
 
 Backdrop is a datastore built with Python and MongoDB. It is made up of two separately deployable APIs for reading and writing data over HTTP. The plan is to be able to gather data from a variety of sources and then aggregate and compare this data in useful ways.
 
-- Data is grouped into buckets.
+- Data is grouped into data_sets.
 - Data is stored by posting json to the write api.
 - Certain types of data are identified by reserved keys. ie events are objects containing a timestamp.
 - Reserved keys start with an underscore. eg `{ "_timestamp": "2013-01-01T00:00:00Z }"`
 - Data is retrieved using http query strings on the read api.
-- Data can be retrieved in a few useful ways. eg `/<name_of_my_bucket>?period=month` for monthly grouped data.
+- Data can be retrieved in a few useful ways. eg `/<name_of_my_data_set>?period=month` for monthly grouped data.
 - Backdrop is in constant development, the best place to find examples and features are [the feature tests](https://github.com/alphagov/backdrop/tree/master/features)
 
 ## Getting set up
@@ -64,14 +64,14 @@ This is the OAuth flow we are using to authenticate users with Signonotron2
 
 Requests return a JSON object containing a `data` array.
 
-`GET /bucket_name` will return an array of data. Each element is an object.
+`GET /data_set_name` will return an array of data. Each element is an object.
 
-`GET /bucket_name?collect=score&group_by=name` will return an array. In this
+`GET /data_set_name?collect=score&group_by=name` will return an array. In this
 case, each element of the array is an object containing a `name` value, a
 `score` array with the scores for that name and a `_count` value with the
 number of scores.
 
-`GET /bucket_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".
+`GET /data_set_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".
 
 Other parameters:
 

diff --git a/backdrop/__init__.py b/backdrop/__init__.py
@@ -6,15 +6,15 @@
 
 
 class StatsClient(object):
-    """Wrap statsd.StatsClient to allow bucket to be added to stat"""
+    """Wrap statsd.StatsClient to allow data_set to be added to stat"""
     def __init__(self, statsd):
         self._statsd = statsd
 
     def __getattr__(self, item):
         if item in ['timer', 'timing', 'incr', 'decr', 'gauge']:
             def func(stat, *args, **kwargs):
-                bucket = kwargs.pop('bucket', 'unknown')
-                stat = '%s.%s' % (bucket, stat)
+                data_set = kwargs.pop('data_set', 'unknown')
+                stat = '%s.%s' % (data_set, stat)
 
                 return getattr(self._statsd, item)(stat, *args, **kwargs)
             return func

diff --git a/backdrop/admin/app.py b/backdrop/admin/app.py
@@ -6,11 +6,11 @@
 
 from .. import statsd
 from ..core import cache_control, log_handler, database
-from ..core.bucket import Bucket
+from ..core.data_set import DataSet
 from ..core.errors import ParseError, ValidationError
 from ..core.repository \
-    import BucketConfigRepository, UserConfigRepository
-from ..core.flaskutils import BucketConverter
+    import DataSetConfigRepository, UserConfigRepository
+from ..core.flaskutils import DataSetConverter
 from ..core.upload import create_parser
 from .signonotron2 import Signonotron2
 from .uploaded_file import UploadedFile, FileUploadError
@@ -25,15 +25,15 @@
 
 log_handler.set_up_logging(app, GOVUK_ENV)
 
-app.url_map.converters["bucket"] = BucketConverter
+app.url_map.converters["data_set"] = DataSetConverter
 
 db = database.Database(
     app.config['MONGO_HOSTS'],
     app.config['MONGO_PORT'],
     app.config['DATABASE_NAME']
 )
 
-bucket_repository = BucketConfigRepository(
+data_set_repository = DataSetConfigRepository(
     app.config['STAGECRAFT_URL'],
     app.config['STAGECRAFT_DATA_SET_QUERY_TOKEN'])
 
@@ -63,13 +63,15 @@ def old_index():
 def exception_handler(e):
     app.logger.exception(e)
 
-    bucket_name = getattr(e, 'bucket_name', request.path)
-    statsd.incr("write.error", bucket=bucket_name)
+    data_set_name = getattr(e, 'data_set_name', request.path)
+    statsd.incr("write.error", data_set=data_set_name)
 
     code = getattr(e, 'code', 500)
     name = getattr(e, 'name', 'Internal Error')
 
-    return render_template("error.html", name=name, bucket_name=bucket_name), \
+    return render_template("error.html",
+                           name=name,
+                           data_set_name=data_set_name), \
         code
 
 
@@ -95,7 +97,7 @@ def prevent_clickjacking(response):
 def index():
     """
     This representation is private to the logged-in user
-    (with their own buckets)
+    (with their own data_sets)
     """
     user_email = session.get('user', {}).get('email')
     if user_email:
@@ -181,39 +183,39 @@ def oauth_sign_out():
                            oauth_base_url=app.config['OAUTH_BASE_URL'])
 
 
-@app.route('/<bucket:bucket_name>/upload', methods=['GET', 'POST'])
+@app.route('/<data_set:data_set_name>/upload', methods=['GET', 'POST'])
 @protected
 @cache_control.set("private, must-revalidate")
-def upload(bucket_name):
-    bucket_config = bucket_repository.retrieve(bucket_name)
+def upload(data_set_name):
+    data_set_config = data_set_repository.retrieve(data_set_name)
     user_config = user_repository.retrieve(
         session.get("user").get("email"))
 
-    if bucket_name not in user_config.buckets:
+    if data_set_name not in user_config.data_sets:
         return abort(404)
 
     if request.method == 'GET':
         return render_template(
-            "upload_{}.html".format(bucket_config.upload_format),
-            bucket_name=bucket_name)
+            "upload_{}.html".format(data_set_config.upload_format),
+            data_set_name=data_set_name)
 
-    return _store_data(bucket_config)
+    return _store_data(data_set_config)
 
 
-def _store_data(bucket_config):
-    parse_file = create_parser(bucket_config)
-    bucket = Bucket(db, bucket_config)
+def _store_data(data_set_config):
+    parse_file = create_parser(data_set_config)
+    data_set = DataSet(db, data_set_config)
     expected_errors = (FileUploadError, ParseError, ValidationError)
 
     try:
         with UploadedFile(request.files['file']) as uploaded_file:
             raw_data = parse_file(uploaded_file.file_stream())
-            bucket.parse_and_store(raw_data)
+            data_set.parse_and_store(raw_data)
     except expected_errors as e:
         app.logger.error('Upload error: {}'.format(e.message))
         return render_template('upload_error.html',
                                message=e.message,
-                               bucket_name=bucket.name), 400
+                               data_set_name=data_set.name), 400
 
     return render_template('upload_ok.html')
 

diff --git a/backdrop/admin/static/backdrop.css b/backdrop/admin/static/backdrop.css
@@ -48,7 +48,7 @@ form div {
     list-style-type: disc;
 }
 
-#bucket-list {
+#data-set-list {
     list-style-type: none;
     margin-left: 0;
 }
diff --git a/backdrop/admin/templates/error.html b/backdrop/admin/templates/error.html
@@ -5,7 +5,7 @@
     <div class="span12 background-image">
       <h1 class="masthead">Error</h1>
       <p>{{name}}</p>
-      <p>{{bucket_name}}</p>
+      <p>{{data_set_name}}</p>
     </div>
   </div>
 {% endblock %}
diff --git a/backdrop/admin/templates/index.html b/backdrop/admin/templates/index.html
@@ -6,24 +6,24 @@
             <h1 class="masthead">Performance Platform</h1>
             <p>Welcome to Backdrop, the data collection API for the Performance Platform.</p>
             {% if user_config %}
-                <h2>Available buckets</h2>
-                {% if user_config.buckets %}
+                <h2>Available data_sets</h2>
+                {% if user_config.data_sets %}
                     <p>
-                        If you can't see the bucket you want to upload data to in this list,
+                        If you can't see the data_set you want to upload data to in this list,
                         <a href="mailto:performance-platform@digital.cabinet-office.gov.uk">contact the Performance Platform team</a>.
                     </p>
-                    <ul id="bucket-list">
-                        {% for bucket_name in user_config.buckets %}
+                    <ul id="data-set-list">
+                        {% for data_set_name in user_config.data_sets %}
                         <li>
-                            <h3>{{ bucket_name }}</h3>
-                            <p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">
-                                Upload a CSV to the {{ bucket_name }} bucket
+                            <h3>{{ data_set_name }}</h3>
+                            <p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">
+                                Upload a CSV to the {{ data_set_name }} data_set
                             </a></p>
                         </li>
                         {% endfor %}
                     </ul>
                 {% else %}
-                    <p>You don't have permission to upload to any buckets yet.</p>
+                    <p>You don't have permission to upload to any data_sets yet.</p>
                 {% endif %}
             {% else %}
                 <a class="btn btn-primary btn-large btn-block" href={{ url_for("oauth_sign_in") }}>Sign in</a>

diff --git a/backdrop/admin/templates/upload_error.html b/backdrop/admin/templates/upload_error.html
@@ -5,5 +5,5 @@
 {% block body %}
     <h1>There was an error with your upload</h1>
     <p>{{ message }}</p>
-    <p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">Back to upload page</a></p>
+    <p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">Back to upload page</a></p>
 {% endblock %}
diff --git a/backdrop/admin/templates/upload_file.html b/backdrop/admin/templates/upload_file.html
@@ -4,9 +4,9 @@
 
 {% block body %}
     <h1>Upload {{ upload_format }} data to the Performance Platform</h1>
-    <h2>Bucket: {{ bucket_name }}</h2>
+    <h2>DataSet: {{ data_set_name }}</h2>
     <p>You can upload data to Performance Platform from your {{ upload_format }} files. For
-        each row in a {{ upload_format }} file a document will be added to your bucket. You need
+        each row in a {{ upload_format }} file a document will be added to your data_set. You need
         to make sure they match our expected format (see example below). </p>
     <p>Download example {{ upload_format }} file: <a href="{{ url_for('static', filename=example_filename) }}">example.{{ upload_extension }}</a></p>
     <form action="upload" method="post" enctype="multipart/form-data">

diff --git a/backdrop/core/bucket.py → backdrop/core/data_set.py b/backdrop/core/bucket.py → backdrop/core/data_set.py
@@ -3,15 +3,15 @@
 from flask import logging
 from backdrop.core import records
 from backdrop.core.errors import ValidationError
-from backdrop.core.validation import bucket_is_valid
+from backdrop.core.validation import data_set_is_valid
 
 import timeutils
 import datetime
 
 log = logging.getLogger(__name__)
 
 
-class Bucket(object):
+class DataSet(object):
 
     def __init__(self, db, config):
         self.name = config.name
@@ -75,34 +75,34 @@ def _generate_id(self, datum):
         return b64encode(".".join([datum[key] for key in self.auto_id_keys]))
 
 
-_BucketConfig = namedtuple(
-    "_BucketConfig",
+_DataSetConfig = namedtuple(
+    "_DataSetConfig",
     "name data_group data_type raw_queries_allowed bearer_token upload_format "
     "upload_filters auto_ids queryable realtime capped_size max_age_expected")
 
 
-class BucketConfig(_BucketConfig):
+class DataSetConfig(_DataSetConfig):
 
     def __new__(cls, name, data_group, data_type, raw_queries_allowed=False,
                 bearer_token=None, upload_format="csv", upload_filters=None,
                 auto_ids=None, queryable=True, realtime=False,
                 capped_size=5040, max_age_expected=2678400):
-        if not bucket_is_valid(name):
-            raise ValueError("Bucket name is not valid: '{}'".format(name))
+        if not data_set_is_valid(name):
+            raise ValueError("DataSet name is not valid: '{}'".format(name))
 
         if upload_filters is None:
             upload_filters = [
                 "backdrop.core.upload.filters.first_sheet_filter"]
 
-        return super(BucketConfig, cls).__new__(cls, name, data_group,
-                                                data_type,
-                                                raw_queries_allowed,
-                                                bearer_token, upload_format,
-                                                upload_filters, auto_ids,
-                                                queryable, realtime,
-                                                capped_size, max_age_expected)
+        return super(DataSetConfig, cls).__new__(cls, name, data_group,
+                                                 data_type,
+                                                 raw_queries_allowed,
+                                                 bearer_token, upload_format,
+                                                 upload_filters, auto_ids,
+                                                 queryable, realtime,
+                                                 capped_size, max_age_expected)
 
     @property
     def max_age(self):
-        """ Set cache-control header length based on type of bucket. """
+        """ Set cache-control header length based on type of data_set. """
         return 120 if self.realtime else 1800
diff --git a/backdrop/core/database.py b/backdrop/core/database.py
@@ -45,8 +45,8 @@ def _client_list(self, hosts, port):
     def alive(self):
         return self._mongo.alive()
 
-    def get_repository(self, bucket_name):
-        return Repository(self.get_collection(bucket_name))
+    def get_repository(self, data_set_name):
+        return Repository(self.get_collection(data_set_name))
 
     def get_collection(self, collection_name):
         return MongoDriver(self._mongo[self.name][collection_name])
@@ -146,7 +146,7 @@ def save(self, obj, tries=3):
             self._collection.save(obj)
         except AutoReconnect:
             logging.warning("AutoReconnect on save")
-            statsd.incr("db.AutoReconnect", bucket=self._collection.name)
+            statsd.incr("db.AutoReconnect", data_set=self._collection.name)
             if tries > 1:
                 self.save(obj, tries - 1)
             else:

diff --git a/backdrop/core/flaskutils.py b/backdrop/core/flaskutils.py
@@ -1,9 +1,9 @@
 from werkzeug.routing import BaseConverter, ValidationError
-from backdrop.core.validation import bucket_is_valid
+from backdrop.core.validation import data_set_is_valid
 
 
-class BucketConverter(BaseConverter):
+class DataSetConverter(BaseConverter):
     def to_python(self, value):
-        if not bucket_is_valid(value):
+        if not data_set_is_valid(value):
             raise ValidationError()
         return value