Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Merge pull request #267 from alphagov/rename_bucket_to_data_set
Browse files Browse the repository at this point in the history
Rename bucket to data set
  • Loading branch information
roc committed Apr 23, 2014
2 parents 8d652a1 + 3cdbdd8 commit f8aac8d
Show file tree
Hide file tree
Showing 70 changed files with 767 additions and 761 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@

Backdrop is a datastore built with Python and MongoDB. It is made up of two separately deployable APIs for reading and writing data over HTTP. The plan is to be able to gather data from a variety of sources and then aggregate and compare this data in useful ways.

- Data is grouped into buckets.
- Data is grouped into data_sets.
- Data is stored by posting json to the write api.
- Certain types of data are identified by reserved keys. ie events are objects containing a timestamp.
- Reserved keys start with an underscore. eg `{ "_timestamp": "2013-01-01T00:00:00Z }"`
- Data is retrieved using http query strings on the read api.
- Data can be retrieved in a few useful ways. eg `/<name_of_my_bucket>?period=month` for monthly grouped data.
- Data can be retrieved in a few useful ways. eg `/<name_of_my_data_set>?period=month` for monthly grouped data.
- Backdrop is in constant development, the best place to find examples and features are [the feature tests](https://github.com/alphagov/backdrop/tree/master/features)

## Getting set up
Expand Down Expand Up @@ -64,14 +64,14 @@ This is the OAuth flow we are using to authenticate users with Signonotron2

Requests return a JSON object containing a `data` array.

`GET /bucket_name` will return an array of data. Each element is an object.
`GET /data_set_name` will return an array of data. Each element is an object.

`GET /bucket_name?collect=score&group_by=name` will return an array. In this
`GET /data_set_name?collect=score&group_by=name` will return an array. In this
case, each element of the array is an object containing a `name` value, a
`score` array with the scores for that name and a `_count` value with the
number of scores.

`GET /bucket_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".
`GET /data_set_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".

Other parameters:

Expand Down
6 changes: 3 additions & 3 deletions backdrop/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@


class StatsClient(object):
"""Wrap statsd.StatsClient to allow bucket to be added to stat"""
"""Wrap statsd.StatsClient to allow data_set to be added to stat"""
def __init__(self, statsd):
self._statsd = statsd

def __getattr__(self, item):
if item in ['timer', 'timing', 'incr', 'decr', 'gauge']:
def func(stat, *args, **kwargs):
bucket = kwargs.pop('bucket', 'unknown')
stat = '%s.%s' % (bucket, stat)
data_set = kwargs.pop('data_set', 'unknown')
stat = '%s.%s' % (data_set, stat)

return getattr(self._statsd, item)(stat, *args, **kwargs)
return func
Expand Down
44 changes: 23 additions & 21 deletions backdrop/admin/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

from .. import statsd
from ..core import cache_control, log_handler, database
from ..core.bucket import Bucket
from ..core.data_set import DataSet
from ..core.errors import ParseError, ValidationError
from ..core.repository \
import BucketConfigRepository, UserConfigRepository
from ..core.flaskutils import BucketConverter
import DataSetConfigRepository, UserConfigRepository
from ..core.flaskutils import DataSetConverter
from ..core.upload import create_parser
from .signonotron2 import Signonotron2
from .uploaded_file import UploadedFile, FileUploadError
Expand All @@ -25,15 +25,15 @@

log_handler.set_up_logging(app, GOVUK_ENV)

app.url_map.converters["bucket"] = BucketConverter
app.url_map.converters["data_set"] = DataSetConverter

db = database.Database(
app.config['MONGO_HOSTS'],
app.config['MONGO_PORT'],
app.config['DATABASE_NAME']
)

bucket_repository = BucketConfigRepository(
data_set_repository = DataSetConfigRepository(
app.config['STAGECRAFT_URL'],
app.config['STAGECRAFT_DATA_SET_QUERY_TOKEN'])

Expand Down Expand Up @@ -63,13 +63,15 @@ def old_index():
def exception_handler(e):
app.logger.exception(e)

bucket_name = getattr(e, 'bucket_name', request.path)
statsd.incr("write.error", bucket=bucket_name)
data_set_name = getattr(e, 'data_set_name', request.path)
statsd.incr("write.error", data_set=data_set_name)

code = getattr(e, 'code', 500)
name = getattr(e, 'name', 'Internal Error')

return render_template("error.html", name=name, bucket_name=bucket_name), \
return render_template("error.html",
name=name,
data_set_name=data_set_name), \
code


Expand All @@ -95,7 +97,7 @@ def prevent_clickjacking(response):
def index():
"""
This representation is private to the logged-in user
(with their own buckets)
(with their own data_sets)
"""
user_email = session.get('user', {}).get('email')
if user_email:
Expand Down Expand Up @@ -181,39 +183,39 @@ def oauth_sign_out():
oauth_base_url=app.config['OAUTH_BASE_URL'])


@app.route('/<bucket:bucket_name>/upload', methods=['GET', 'POST'])
@app.route('/<data_set:data_set_name>/upload', methods=['GET', 'POST'])
@protected
@cache_control.set("private, must-revalidate")
def upload(bucket_name):
bucket_config = bucket_repository.retrieve(bucket_name)
def upload(data_set_name):
data_set_config = data_set_repository.retrieve(data_set_name)
user_config = user_repository.retrieve(
session.get("user").get("email"))

if bucket_name not in user_config.buckets:
if data_set_name not in user_config.data_sets:
return abort(404)

if request.method == 'GET':
return render_template(
"upload_{}.html".format(bucket_config.upload_format),
bucket_name=bucket_name)
"upload_{}.html".format(data_set_config.upload_format),
data_set_name=data_set_name)

return _store_data(bucket_config)
return _store_data(data_set_config)


def _store_data(bucket_config):
parse_file = create_parser(bucket_config)
bucket = Bucket(db, bucket_config)
def _store_data(data_set_config):
parse_file = create_parser(data_set_config)
data_set = DataSet(db, data_set_config)
expected_errors = (FileUploadError, ParseError, ValidationError)

try:
with UploadedFile(request.files['file']) as uploaded_file:
raw_data = parse_file(uploaded_file.file_stream())
bucket.parse_and_store(raw_data)
data_set.parse_and_store(raw_data)
except expected_errors as e:
app.logger.error('Upload error: {}'.format(e.message))
return render_template('upload_error.html',
message=e.message,
bucket_name=bucket.name), 400
data_set_name=data_set.name), 400

return render_template('upload_ok.html')

Expand Down
2 changes: 1 addition & 1 deletion backdrop/admin/static/backdrop.css
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ form div {
list-style-type: disc;
}

#bucket-list {
#data-set-list {
list-style-type: none;
margin-left: 0;
}
2 changes: 1 addition & 1 deletion backdrop/admin/templates/error.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<div class="span12 background-image">
<h1 class="masthead">Error</h1>
<p>{{name}}</p>
<p>{{bucket_name}}</p>
<p>{{data_set_name}}</p>
</div>
</div>
{% endblock %}
18 changes: 9 additions & 9 deletions backdrop/admin/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@
<h1 class="masthead">Performance Platform</h1>
<p>Welcome to Backdrop, the data collection API for the Performance Platform.</p>
{% if user_config %}
<h2>Available buckets</h2>
{% if user_config.buckets %}
<h2>Available data_sets</h2>
{% if user_config.data_sets %}
<p>
If you can't see the bucket you want to upload data to in this list,
If you can't see the data_set you want to upload data to in this list,
<a href="mailto:performance-platform@digital.cabinet-office.gov.uk">contact the Performance Platform team</a>.
</p>
<ul id="bucket-list">
{% for bucket_name in user_config.buckets %}
<ul id="data-set-list">
{% for data_set_name in user_config.data_sets %}
<li>
<h3>{{ bucket_name }}</h3>
<p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">
Upload a CSV to the {{ bucket_name }} bucket
<h3>{{ data_set_name }}</h3>
<p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">
Upload a CSV to the {{ data_set_name }} data_set
</a></p>
</li>
{% endfor %}
</ul>
{% else %}
<p>You don't have permission to upload to any buckets yet.</p>
<p>You don't have permission to upload to any data_sets yet.</p>
{% endif %}
{% else %}
<a class="btn btn-primary btn-large btn-block" href={{ url_for("oauth_sign_in") }}>Sign in</a>
Expand Down
2 changes: 1 addition & 1 deletion backdrop/admin/templates/upload_error.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
{% block body %}
<h1>There was an error with your upload</h1>
<p>{{ message }}</p>
<p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">Back to upload page</a></p>
<p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">Back to upload page</a></p>
{% endblock %}
4 changes: 2 additions & 2 deletions backdrop/admin/templates/upload_file.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

{% block body %}
<h1>Upload {{ upload_format }} data to the Performance Platform</h1>
<h2>Bucket: {{ bucket_name }}</h2>
<h2>DataSet: {{ data_set_name }}</h2>
<p>You can upload data to Performance Platform from your {{ upload_format }} files. For
each row in a {{ upload_format }} file a document will be added to your bucket. You need
each row in a {{ upload_format }} file a document will be added to your data_set. You need
to make sure they match our expected format (see example below). </p>
<p>Download example {{ upload_format }} file: <a href="{{ url_for('static', filename=example_filename) }}">example.{{ upload_extension }}</a></p>
<form action="upload" method="post" enctype="multipart/form-data">
Expand Down
30 changes: 15 additions & 15 deletions backdrop/core/bucket.py → backdrop/core/data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
from flask import logging
from backdrop.core import records
from backdrop.core.errors import ValidationError
from backdrop.core.validation import bucket_is_valid
from backdrop.core.validation import data_set_is_valid

import timeutils
import datetime

log = logging.getLogger(__name__)


class Bucket(object):
class DataSet(object):

def __init__(self, db, config):
self.name = config.name
Expand Down Expand Up @@ -75,34 +75,34 @@ def _generate_id(self, datum):
return b64encode(".".join([datum[key] for key in self.auto_id_keys]))


_BucketConfig = namedtuple(
"_BucketConfig",
_DataSetConfig = namedtuple(
"_DataSetConfig",
"name data_group data_type raw_queries_allowed bearer_token upload_format "
"upload_filters auto_ids queryable realtime capped_size max_age_expected")


class BucketConfig(_BucketConfig):
class DataSetConfig(_DataSetConfig):

def __new__(cls, name, data_group, data_type, raw_queries_allowed=False,
bearer_token=None, upload_format="csv", upload_filters=None,
auto_ids=None, queryable=True, realtime=False,
capped_size=5040, max_age_expected=2678400):
if not bucket_is_valid(name):
raise ValueError("Bucket name is not valid: '{}'".format(name))
if not data_set_is_valid(name):
raise ValueError("DataSet name is not valid: '{}'".format(name))

if upload_filters is None:
upload_filters = [
"backdrop.core.upload.filters.first_sheet_filter"]

return super(BucketConfig, cls).__new__(cls, name, data_group,
data_type,
raw_queries_allowed,
bearer_token, upload_format,
upload_filters, auto_ids,
queryable, realtime,
capped_size, max_age_expected)
return super(DataSetConfig, cls).__new__(cls, name, data_group,
data_type,
raw_queries_allowed,
bearer_token, upload_format,
upload_filters, auto_ids,
queryable, realtime,
capped_size, max_age_expected)

@property
def max_age(self):
""" Set cache-control header length based on type of bucket. """
""" Set cache-control header length based on type of data_set. """
return 120 if self.realtime else 1800
6 changes: 3 additions & 3 deletions backdrop/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def _client_list(self, hosts, port):
def alive(self):
return self._mongo.alive()

def get_repository(self, bucket_name):
return Repository(self.get_collection(bucket_name))
def get_repository(self, data_set_name):
return Repository(self.get_collection(data_set_name))

def get_collection(self, collection_name):
return MongoDriver(self._mongo[self.name][collection_name])
Expand Down Expand Up @@ -146,7 +146,7 @@ def save(self, obj, tries=3):
self._collection.save(obj)
except AutoReconnect:
logging.warning("AutoReconnect on save")
statsd.incr("db.AutoReconnect", bucket=self._collection.name)
statsd.incr("db.AutoReconnect", data_set=self._collection.name)
if tries > 1:
self.save(obj, tries - 1)
else:
Expand Down
6 changes: 3 additions & 3 deletions backdrop/core/flaskutils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from werkzeug.routing import BaseConverter, ValidationError
from backdrop.core.validation import bucket_is_valid
from backdrop.core.validation import data_set_is_valid


class BucketConverter(BaseConverter):
class DataSetConverter(BaseConverter):
def to_python(self, value):
if not bucket_is_valid(value):
if not data_set_is_valid(value):
raise ValidationError()
return value

0 comments on commit f8aac8d

Please sign in to comment.