@wip commit of renaming

have some idea what can go wrong from this
alphagov · Apr 17, 2014 · 69bcd08 · 69bcd08
1 parent 8d652a1
commit 69bcd08
Show file tree

Hide file tree

Showing 73 changed files with 698 additions and 698 deletions.
diff --git a/README.md b/README.md
@@ -8,12 +8,12 @@
 
 Backdrop is a datastore built with Python and MongoDB. It is made up of two separately deployable APIs for reading and writing data over HTTP. The plan is to be able to gather data from a variety of sources and then aggregate and compare this data in useful ways.
 
-- Data is grouped into buckets.
+- Data is grouped into data_sets.
 - Data is stored by posting json to the write api.
 - Certain types of data are identified by reserved keys. ie events are objects containing a timestamp.
 - Reserved keys start with an underscore. eg `{ "_timestamp": "2013-01-01T00:00:00Z }"`
 - Data is retrieved using http query strings on the read api.
-- Data can be retrieved in a few useful ways. eg `/<name_of_my_bucket>?period=month` for monthly grouped data.
+- Data can be retrieved in a few useful ways. eg `/<name_of_my_data_set>?period=month` for monthly grouped data.
 - Backdrop is in constant development, the best place to find examples and features are [the feature tests](https://github.com/alphagov/backdrop/tree/master/features)
 
 ## Getting set up
@@ -64,14 +64,14 @@ This is the OAuth flow we are using to authenticate users with Signonotron2
 
 Requests return a JSON object containing a `data` array.
 
-`GET /bucket_name` will return an array of data. Each element is an object.
+`GET /data_set_name` will return an array of data. Each element is an object.
 
-`GET /bucket_name?collect=score&group_by=name` will return an array. In this
+`GET /data_set_name?collect=score&group_by=name` will return an array. In this
 case, each element of the array is an object containing a `name` value, a
 `score` array with the scores for that name and a `_count` value with the
 number of scores.
 
-`GET /bucket_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".
+`GET /data_set_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".
 
 Other parameters:
 

diff --git a/backdrop/__init__.py b/backdrop/__init__.py
@@ -6,15 +6,15 @@
 
 
 class StatsClient(object):
-    """Wrap statsd.StatsClient to allow bucket to be added to stat"""
+    """Wrap statsd.StatsClient to allow data_set to be added to stat"""
     def __init__(self, statsd):
         self._statsd = statsd
 
     def __getattr__(self, item):
         if item in ['timer', 'timing', 'incr', 'decr', 'gauge']:
             def func(stat, *args, **kwargs):
-                bucket = kwargs.pop('bucket', 'unknown')
-                stat = '%s.%s' % (bucket, stat)
+                data_set = kwargs.pop('data_set', 'unknown')
+                stat = '%s.%s' % (data_set, stat)
 
                 return getattr(self._statsd, item)(stat, *args, **kwargs)
             return func

diff --git a/backdrop/admin/app.py b/backdrop/admin/app.py
@@ -6,7 +6,7 @@
 
 from .. import statsd
 from ..core import cache_control, log_handler, database
-from ..core.bucket import Bucket
+from ..core.data_set import Bucket
 from ..core.errors import ParseError, ValidationError
 from ..core.repository \
     import BucketConfigRepository, UserConfigRepository
@@ -25,15 +25,15 @@
 
 log_handler.set_up_logging(app, GOVUK_ENV)
 
-app.url_map.converters["bucket"] = BucketConverter
+app.url_map.converters["data_set"] = BucketConverter
 
 db = database.Database(
     app.config['MONGO_HOSTS'],
     app.config['MONGO_PORT'],
     app.config['DATABASE_NAME']
 )
 
-bucket_repository = BucketConfigRepository(
+data_set_repository = BucketConfigRepository(
     app.config['STAGECRAFT_URL'],
     app.config['STAGECRAFT_DATA_SET_QUERY_TOKEN'])
 
@@ -63,13 +63,13 @@ def old_index():
 def exception_handler(e):
     app.logger.exception(e)
 
-    bucket_name = getattr(e, 'bucket_name', request.path)
-    statsd.incr("write.error", bucket=bucket_name)
+    data_set_name = getattr(e, 'data_set_name', request.path)
+    statsd.incr("write.error", data_set=data_set_name)
 
     code = getattr(e, 'code', 500)
     name = getattr(e, 'name', 'Internal Error')
 
-    return render_template("error.html", name=name, bucket_name=bucket_name), \
+    return render_template("error.html", name=name, data_set_name=data_set_name), \
         code
 
 
@@ -95,7 +95,7 @@ def prevent_clickjacking(response):
 def index():
     """
     This representation is private to the logged-in user
-    (with their own buckets)
+    (with their own data_sets)
     """
     user_email = session.get('user', {}).get('email')
     if user_email:
@@ -181,39 +181,39 @@ def oauth_sign_out():
                            oauth_base_url=app.config['OAUTH_BASE_URL'])
 
 
-@app.route('/<bucket:bucket_name>/upload', methods=['GET', 'POST'])
+@app.route('/<data_set:data_set_name>/upload', methods=['GET', 'POST'])
 @protected
 @cache_control.set("private, must-revalidate")
-def upload(bucket_name):
-    bucket_config = bucket_repository.retrieve(bucket_name)
+def upload(data_set_name):
+    data_set_config = data_set_repository.retrieve(data_set_name)
     user_config = user_repository.retrieve(
         session.get("user").get("email"))
 
-    if bucket_name not in user_config.buckets:
+    if data_set_name not in user_config.data_sets:
         return abort(404)
 
     if request.method == 'GET':
         return render_template(
-            "upload_{}.html".format(bucket_config.upload_format),
-            bucket_name=bucket_name)
+            "upload_{}.html".format(data_set_config.upload_format),
+            data_set_name=data_set_name)
 
-    return _store_data(bucket_config)
+    return _store_data(data_set_config)
 
 
-def _store_data(bucket_config):
-    parse_file = create_parser(bucket_config)
-    bucket = Bucket(db, bucket_config)
+def _store_data(data_set_config):
+    parse_file = create_parser(data_set_config)
+    data_set = Bucket(db, data_set_config)
     expected_errors = (FileUploadError, ParseError, ValidationError)
 
     try:
         with UploadedFile(request.files['file']) as uploaded_file:
             raw_data = parse_file(uploaded_file.file_stream())
-            bucket.parse_and_store(raw_data)
+            data_set.parse_and_store(raw_data)
     except expected_errors as e:
         app.logger.error('Upload error: {}'.format(e.message))
         return render_template('upload_error.html',
                                message=e.message,
-                               bucket_name=bucket.name), 400
+                               data_set_name=data_set.name), 400
 
     return render_template('upload_ok.html')
 

diff --git a/backdrop/admin/static/backdrop.css b/backdrop/admin/static/backdrop.css
@@ -48,7 +48,7 @@ form div {
     list-style-type: disc;
 }
 
-#bucket-list {
+#data_set-list {
     list-style-type: none;
     margin-left: 0;
 }
diff --git a/backdrop/admin/templates/error.html b/backdrop/admin/templates/error.html
@@ -5,7 +5,7 @@
     <div class="span12 background-image">
       <h1 class="masthead">Error</h1>
       <p>{{name}}</p>
-      <p>{{bucket_name}}</p>
+      <p>{{data_set_name}}</p>
     </div>
   </div>
 {% endblock %}
diff --git a/backdrop/admin/templates/index.html b/backdrop/admin/templates/index.html
@@ -6,24 +6,24 @@
             <h1 class="masthead">Performance Platform</h1>
             <p>Welcome to Backdrop, the data collection API for the Performance Platform.</p>
             {% if user_config %}
-                <h2>Available buckets</h2>
-                {% if user_config.buckets %}
+                <h2>Available data_sets</h2>
+                {% if user_config.data_sets %}
                     <p>
-                        If you can't see the bucket you want to upload data to in this list,
+                        If you can't see the data_set you want to upload data to in this list,
                         <a href="mailto:performance-platform@digital.cabinet-office.gov.uk">contact the Performance Platform team</a>.
                     </p>
-                    <ul id="bucket-list">
-                        {% for bucket_name in user_config.buckets %}
+                    <ul id="data_set-list">
+                        {% for data_set_name in user_config.data_sets %}
                         <li>
-                            <h3>{{ bucket_name }}</h3>
-                            <p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">
-                                Upload a CSV to the {{ bucket_name }} bucket
+                            <h3>{{ data_set_name }}</h3>
+                            <p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">
+                                Upload a CSV to the {{ data_set_name }} data_set
                             </a></p>
                         </li>
                         {% endfor %}
                     </ul>
                 {% else %}
-                    <p>You don't have permission to upload to any buckets yet.</p>
+                    <p>You don't have permission to upload to any data_sets yet.</p>
                 {% endif %}
             {% else %}
                 <a class="btn btn-primary btn-large btn-block" href={{ url_for("oauth_sign_in") }}>Sign in</a>

diff --git a/backdrop/admin/templates/upload_error.html b/backdrop/admin/templates/upload_error.html
@@ -5,5 +5,5 @@
 {% block body %}
     <h1>There was an error with your upload</h1>
     <p>{{ message }}</p>
-    <p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">Back to upload page</a></p>
+    <p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">Back to upload page</a></p>
 {% endblock %}
diff --git a/backdrop/admin/templates/upload_file.html b/backdrop/admin/templates/upload_file.html
@@ -4,9 +4,9 @@
 
 {% block body %}
     <h1>Upload {{ upload_format }} data to the Performance Platform</h1>
-    <h2>Bucket: {{ bucket_name }}</h2>
+    <h2>Bucket: {{ data_set_name }}</h2>
     <p>You can upload data to Performance Platform from your {{ upload_format }} files. For
-        each row in a {{ upload_format }} file a document will be added to your bucket. You need
+        each row in a {{ upload_format }} file a document will be added to your data_set. You need
         to make sure they match our expected format (see example below). </p>
     <p>Download example {{ upload_format }} file: <a href="{{ url_for('static', filename=example_filename) }}">example.{{ upload_extension }}</a></p>
     <form action="upload" method="post" enctype="multipart/form-data">

diff --git a/backdrop/core/bucket.py b/backdrop/core/bucket.py
@@ -3,7 +3,7 @@
 from flask import logging
 from backdrop.core import records
 from backdrop.core.errors import ValidationError
-from backdrop.core.validation import bucket_is_valid
+from backdrop.core.validation import data_set_is_valid
 
 import timeutils
 import datetime
@@ -87,7 +87,7 @@ def __new__(cls, name, data_group, data_type, raw_queries_allowed=False,
                 bearer_token=None, upload_format="csv", upload_filters=None,
                 auto_ids=None, queryable=True, realtime=False,
                 capped_size=5040, max_age_expected=2678400):
-        if not bucket_is_valid(name):
+        if not data_set_is_valid(name):
             raise ValueError("Bucket name is not valid: '{}'".format(name))
 
         if upload_filters is None:
@@ -104,5 +104,5 @@ def __new__(cls, name, data_group, data_type, raw_queries_allowed=False,
 
     @property
     def max_age(self):
-        """ Set cache-control header length based on type of bucket. """
+        """ Set cache-control header length based on type of data_set. """
         return 120 if self.realtime else 1800
diff --git a/backdrop/core/database.py b/backdrop/core/database.py
@@ -45,8 +45,8 @@ def _client_list(self, hosts, port):
     def alive(self):
         return self._mongo.alive()
 
-    def get_repository(self, bucket_name):
-        return Repository(self.get_collection(bucket_name))
+    def get_repository(self, data_set_name):
+        return Repository(self.get_collection(data_set_name))
 
     def get_collection(self, collection_name):
         return MongoDriver(self._mongo[self.name][collection_name])
@@ -146,7 +146,7 @@ def save(self, obj, tries=3):
             self._collection.save(obj)
         except AutoReconnect:
             logging.warning("AutoReconnect on save")
-            statsd.incr("db.AutoReconnect", bucket=self._collection.name)
+            statsd.incr("db.AutoReconnect", data_set=self._collection.name)
             if tries > 1:
                 self.save(obj, tries - 1)
             else:

diff --git a/backdrop/core/flaskutils.py b/backdrop/core/flaskutils.py
@@ -1,9 +1,9 @@
 from werkzeug.routing import BaseConverter, ValidationError
-from backdrop.core.validation import bucket_is_valid
+from backdrop.core.validation import data_set_is_valid
 
 
 class BucketConverter(BaseConverter):
     def to_python(self, value):
-        if not bucket_is_valid(value):
+        if not data_set_is_valid(value):
             raise ValidationError()
         return value
diff --git a/backdrop/core/repository.py b/backdrop/core/repository.py
@@ -4,7 +4,7 @@
 
 import requests
 
-from backdrop.core.bucket import BucketConfig
+from backdrop.core.data_set import BucketConfig
 from backdrop.core.user import UserConfig
 
 logger = logging.getLogger(__name__)
@@ -40,7 +40,7 @@ def find_first_instance_of(self, params):
         return self._create_model(doc)
 
     def get_all(self):
-        # Return a list of all bucket config instances
+        # Return a list of all data_set config instances
         return [self._create_model(doc) for doc in self.collection.find()]
 
     def _create_model(self, doc):
@@ -61,7 +61,7 @@ def get_all(self):
         json_response = _get_json_url(data_set_url, self._stagecraft_token)
         data_sets = _decode_json(json_response)
 
-        return [_make_bucket_config(data_set) for data_set in data_sets]
+        return [_make_data_set_config(data_set) for data_set in data_sets]
 
     def retrieve(self, name):
         if len(name) == 0:
@@ -78,9 +78,9 @@ def retrieve(self, name):
             else:
                 raise
 
-        return _make_bucket_config(_decode_json(json_response))
+        return _make_data_set_config(_decode_json(json_response))
 
-    def get_bucket_for_query(self, data_group, data_type):
+    def get_data_set_for_query(self, data_group, data_type):
         empty_vars = []
         if len(data_group) == 0:
             empty_vars += ['Data Group']
@@ -99,12 +99,12 @@ def get_bucket_for_query(self, data_group, data_type):
 
         data_sets = _decode_json(json_response)
         if len(data_sets) > 0:
-            return _make_bucket_config(data_sets[0])
+            return _make_data_set_config(data_sets[0])
 
         return None
 
 
-def _make_bucket_config(stagecraft_dict):
+def _make_data_set_config(stagecraft_dict):
     if stagecraft_dict is None:
         return None
     return BucketConfig(**stagecraft_dict)

diff --git a/backdrop/core/upload/__init__.py b/backdrop/core/upload/__init__.py
@@ -3,9 +3,9 @@
 from .parse_excel import parse_excel
 
 
-def create_parser(bucket_config):
-    format_parser = load_format_parser(bucket_config.upload_format)
-    upload_filters = map(load_filter, bucket_config.upload_filters)
+def create_parser(data_set_config):
+    format_parser = load_format_parser(data_set_config.upload_format)
+    upload_filters = map(load_filter, data_set_config.upload_filters)
 
     def parser(file_stream):
         data = format_parser(file_stream)

diff --git a/backdrop/core/user.py b/backdrop/core/user.py
@@ -1,25 +1,25 @@
 from collections import namedtuple
 
 
-def _bucket_list_is_valid(buckets):
-    if not isinstance(buckets, list):
+def _data_set_list_is_valid(data_sets):
+    if not isinstance(data_sets, list):
         return False
 
     is_string = lambda value: isinstance(value, basestring)
 
-    return all(map(is_string, buckets))
+    return all(map(is_string, data_sets))
 
 
 _UserConfig = namedtuple(
     "_UserConfig",
-    "email buckets")
+    "email data_sets")
 
 
 class UserConfig(_UserConfig):
-    def __new__(cls, email, buckets=None):
-        if buckets is None:
-            buckets = []
-        elif not _bucket_list_is_valid(buckets):
-            raise ValueError("buckets must be a list of bucket names")
+    def __new__(cls, email, data_sets=None):
+        if data_sets is None:
+            data_sets = []
+        elif not _data_set_list_is_valid(data_sets):
+            raise ValueError("data_sets must be a list of data_set names")
 
-        return super(UserConfig, cls).__new__(cls, email, buckets)
+        return super(UserConfig, cls).__new__(cls, email, data_sets)
diff --git a/backdrop/core/validation.py b/backdrop/core/validation.py
@@ -63,8 +63,8 @@ def key_is_internal(key):
     return key.startswith('_')
 
 
-def bucket_is_valid(bucket_name):
-    if key_is_valid(bucket_name) and not key_is_internal(bucket_name):
+def data_set_is_valid(data_set_name):
+    if key_is_valid(data_set_name) and not key_is_internal(data_set_name):
         return True
     return False