Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
@wip commit of renaming
Browse files Browse the repository at this point in the history
have some idea what can go wrong from this
  • Loading branch information
jcbashdown committed Apr 17, 2014
1 parent 8d652a1 commit 69bcd08
Show file tree
Hide file tree
Showing 73 changed files with 698 additions and 698 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@

Backdrop is a datastore built with Python and MongoDB. It is made up of two separately deployable APIs for reading and writing data over HTTP. The plan is to be able to gather data from a variety of sources and then aggregate and compare this data in useful ways.

- Data is grouped into buckets.
- Data is grouped into data_sets.
- Data is stored by posting json to the write api.
- Certain types of data are identified by reserved keys. ie events are objects containing a timestamp.
- Reserved keys start with an underscore. eg `{ "_timestamp": "2013-01-01T00:00:00Z }"`
- Data is retrieved using http query strings on the read api.
- Data can be retrieved in a few useful ways. eg `/<name_of_my_bucket>?period=month` for monthly grouped data.
- Data can be retrieved in a few useful ways. eg `/<name_of_my_data_set>?period=month` for monthly grouped data.
- Backdrop is in constant development, the best place to find examples and features are [the feature tests](https://github.com/alphagov/backdrop/tree/master/features)

## Getting set up
Expand Down Expand Up @@ -64,14 +64,14 @@ This is the OAuth flow we are using to authenticate users with Signonotron2

Requests return a JSON object containing a `data` array.

`GET /bucket_name` will return an array of data. Each element is an object.
`GET /data_set_name` will return an array of data. Each element is an object.

`GET /bucket_name?collect=score&group_by=name` will return an array. In this
`GET /data_set_name?collect=score&group_by=name` will return an array. In this
case, each element of the array is an object containing a `name` value, a
`score` array with the scores for that name and a `_count` value with the
number of scores.

`GET /bucket_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".
`GET /data_set_name?filter_by=name:Foo` returns all elements with `name` equal to "Foo".

Other parameters:

Expand Down
6 changes: 3 additions & 3 deletions backdrop/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@


class StatsClient(object):
"""Wrap statsd.StatsClient to allow bucket to be added to stat"""
"""Wrap statsd.StatsClient to allow data_set to be added to stat"""
def __init__(self, statsd):
self._statsd = statsd

def __getattr__(self, item):
if item in ['timer', 'timing', 'incr', 'decr', 'gauge']:
def func(stat, *args, **kwargs):
bucket = kwargs.pop('bucket', 'unknown')
stat = '%s.%s' % (bucket, stat)
data_set = kwargs.pop('data_set', 'unknown')
stat = '%s.%s' % (data_set, stat)

return getattr(self._statsd, item)(stat, *args, **kwargs)
return func
Expand Down
38 changes: 19 additions & 19 deletions backdrop/admin/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from .. import statsd
from ..core import cache_control, log_handler, database
from ..core.bucket import Bucket
from ..core.data_set import Bucket
from ..core.errors import ParseError, ValidationError
from ..core.repository \
import BucketConfigRepository, UserConfigRepository
Expand All @@ -25,15 +25,15 @@

log_handler.set_up_logging(app, GOVUK_ENV)

app.url_map.converters["bucket"] = BucketConverter
app.url_map.converters["data_set"] = BucketConverter

db = database.Database(
app.config['MONGO_HOSTS'],
app.config['MONGO_PORT'],
app.config['DATABASE_NAME']
)

bucket_repository = BucketConfigRepository(
data_set_repository = BucketConfigRepository(
app.config['STAGECRAFT_URL'],
app.config['STAGECRAFT_DATA_SET_QUERY_TOKEN'])

Expand Down Expand Up @@ -63,13 +63,13 @@ def old_index():
def exception_handler(e):
app.logger.exception(e)

bucket_name = getattr(e, 'bucket_name', request.path)
statsd.incr("write.error", bucket=bucket_name)
data_set_name = getattr(e, 'data_set_name', request.path)
statsd.incr("write.error", data_set=data_set_name)

code = getattr(e, 'code', 500)
name = getattr(e, 'name', 'Internal Error')

return render_template("error.html", name=name, bucket_name=bucket_name), \
return render_template("error.html", name=name, data_set_name=data_set_name), \
code


Expand All @@ -95,7 +95,7 @@ def prevent_clickjacking(response):
def index():
"""
This representation is private to the logged-in user
(with their own buckets)
(with their own data_sets)
"""
user_email = session.get('user', {}).get('email')
if user_email:
Expand Down Expand Up @@ -181,39 +181,39 @@ def oauth_sign_out():
oauth_base_url=app.config['OAUTH_BASE_URL'])


@app.route('/<bucket:bucket_name>/upload', methods=['GET', 'POST'])
@app.route('/<data_set:data_set_name>/upload', methods=['GET', 'POST'])
@protected
@cache_control.set("private, must-revalidate")
def upload(bucket_name):
bucket_config = bucket_repository.retrieve(bucket_name)
def upload(data_set_name):
data_set_config = data_set_repository.retrieve(data_set_name)
user_config = user_repository.retrieve(
session.get("user").get("email"))

if bucket_name not in user_config.buckets:
if data_set_name not in user_config.data_sets:
return abort(404)

if request.method == 'GET':
return render_template(
"upload_{}.html".format(bucket_config.upload_format),
bucket_name=bucket_name)
"upload_{}.html".format(data_set_config.upload_format),
data_set_name=data_set_name)

return _store_data(bucket_config)
return _store_data(data_set_config)


def _store_data(bucket_config):
parse_file = create_parser(bucket_config)
bucket = Bucket(db, bucket_config)
def _store_data(data_set_config):
parse_file = create_parser(data_set_config)
data_set = Bucket(db, data_set_config)
expected_errors = (FileUploadError, ParseError, ValidationError)

try:
with UploadedFile(request.files['file']) as uploaded_file:
raw_data = parse_file(uploaded_file.file_stream())
bucket.parse_and_store(raw_data)
data_set.parse_and_store(raw_data)
except expected_errors as e:
app.logger.error('Upload error: {}'.format(e.message))
return render_template('upload_error.html',
message=e.message,
bucket_name=bucket.name), 400
data_set_name=data_set.name), 400

return render_template('upload_ok.html')

Expand Down
2 changes: 1 addition & 1 deletion backdrop/admin/static/backdrop.css
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ form div {
list-style-type: disc;
}

#bucket-list {
#data_set-list {
list-style-type: none;
margin-left: 0;
}
2 changes: 1 addition & 1 deletion backdrop/admin/templates/error.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<div class="span12 background-image">
<h1 class="masthead">Error</h1>
<p>{{name}}</p>
<p>{{bucket_name}}</p>
<p>{{data_set_name}}</p>
</div>
</div>
{% endblock %}
18 changes: 9 additions & 9 deletions backdrop/admin/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@
<h1 class="masthead">Performance Platform</h1>
<p>Welcome to Backdrop, the data collection API for the Performance Platform.</p>
{% if user_config %}
<h2>Available buckets</h2>
{% if user_config.buckets %}
<h2>Available data_sets</h2>
{% if user_config.data_sets %}
<p>
If you can't see the bucket you want to upload data to in this list,
If you can't see the data_set you want to upload data to in this list,
<a href="mailto:performance-platform@digital.cabinet-office.gov.uk">contact the Performance Platform team</a>.
</p>
<ul id="bucket-list">
{% for bucket_name in user_config.buckets %}
<ul id="data_set-list">
{% for data_set_name in user_config.data_sets %}
<li>
<h3>{{ bucket_name }}</h3>
<p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">
Upload a CSV to the {{ bucket_name }} bucket
<h3>{{ data_set_name }}</h3>
<p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">
Upload a CSV to the {{ data_set_name }} data_set
</a></p>
</li>
{% endfor %}
</ul>
{% else %}
<p>You don't have permission to upload to any buckets yet.</p>
<p>You don't have permission to upload to any data_sets yet.</p>
{% endif %}
{% else %}
<a class="btn btn-primary btn-large btn-block" href={{ url_for("oauth_sign_in") }}>Sign in</a>
Expand Down
2 changes: 1 addition & 1 deletion backdrop/admin/templates/upload_error.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
{% block body %}
<h1>There was an error with your upload</h1>
<p>{{ message }}</p>
<p><a href="{{ url_for('upload', bucket_name=bucket_name) }}">Back to upload page</a></p>
<p><a href="{{ url_for('upload', data_set_name=data_set_name) }}">Back to upload page</a></p>
{% endblock %}
4 changes: 2 additions & 2 deletions backdrop/admin/templates/upload_file.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

{% block body %}
<h1>Upload {{ upload_format }} data to the Performance Platform</h1>
<h2>Bucket: {{ bucket_name }}</h2>
<h2>Bucket: {{ data_set_name }}</h2>
<p>You can upload data to Performance Platform from your {{ upload_format }} files. For
each row in a {{ upload_format }} file a document will be added to your bucket. You need
each row in a {{ upload_format }} file a document will be added to your data_set. You need
to make sure they match our expected format (see example below). </p>
<p>Download example {{ upload_format }} file: <a href="{{ url_for('static', filename=example_filename) }}">example.{{ upload_extension }}</a></p>
<form action="upload" method="post" enctype="multipart/form-data">
Expand Down
6 changes: 3 additions & 3 deletions backdrop/core/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from flask import logging
from backdrop.core import records
from backdrop.core.errors import ValidationError
from backdrop.core.validation import bucket_is_valid
from backdrop.core.validation import data_set_is_valid

import timeutils
import datetime
Expand Down Expand Up @@ -87,7 +87,7 @@ def __new__(cls, name, data_group, data_type, raw_queries_allowed=False,
bearer_token=None, upload_format="csv", upload_filters=None,
auto_ids=None, queryable=True, realtime=False,
capped_size=5040, max_age_expected=2678400):
if not bucket_is_valid(name):
if not data_set_is_valid(name):
raise ValueError("Bucket name is not valid: '{}'".format(name))

if upload_filters is None:
Expand All @@ -104,5 +104,5 @@ def __new__(cls, name, data_group, data_type, raw_queries_allowed=False,

@property
def max_age(self):
""" Set cache-control header length based on type of bucket. """
""" Set cache-control header length based on type of data_set. """
return 120 if self.realtime else 1800
6 changes: 3 additions & 3 deletions backdrop/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def _client_list(self, hosts, port):
def alive(self):
return self._mongo.alive()

def get_repository(self, bucket_name):
return Repository(self.get_collection(bucket_name))
def get_repository(self, data_set_name):
return Repository(self.get_collection(data_set_name))

def get_collection(self, collection_name):
return MongoDriver(self._mongo[self.name][collection_name])
Expand Down Expand Up @@ -146,7 +146,7 @@ def save(self, obj, tries=3):
self._collection.save(obj)
except AutoReconnect:
logging.warning("AutoReconnect on save")
statsd.incr("db.AutoReconnect", bucket=self._collection.name)
statsd.incr("db.AutoReconnect", data_set=self._collection.name)
if tries > 1:
self.save(obj, tries - 1)
else:
Expand Down
4 changes: 2 additions & 2 deletions backdrop/core/flaskutils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from werkzeug.routing import BaseConverter, ValidationError
from backdrop.core.validation import bucket_is_valid
from backdrop.core.validation import data_set_is_valid


class BucketConverter(BaseConverter):
def to_python(self, value):
if not bucket_is_valid(value):
if not data_set_is_valid(value):
raise ValidationError()
return value
14 changes: 7 additions & 7 deletions backdrop/core/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import requests

from backdrop.core.bucket import BucketConfig
from backdrop.core.data_set import BucketConfig
from backdrop.core.user import UserConfig

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -40,7 +40,7 @@ def find_first_instance_of(self, params):
return self._create_model(doc)

def get_all(self):
# Return a list of all bucket config instances
# Return a list of all data_set config instances
return [self._create_model(doc) for doc in self.collection.find()]

def _create_model(self, doc):
Expand All @@ -61,7 +61,7 @@ def get_all(self):
json_response = _get_json_url(data_set_url, self._stagecraft_token)
data_sets = _decode_json(json_response)

return [_make_bucket_config(data_set) for data_set in data_sets]
return [_make_data_set_config(data_set) for data_set in data_sets]

def retrieve(self, name):
if len(name) == 0:
Expand All @@ -78,9 +78,9 @@ def retrieve(self, name):
else:
raise

return _make_bucket_config(_decode_json(json_response))
return _make_data_set_config(_decode_json(json_response))

def get_bucket_for_query(self, data_group, data_type):
def get_data_set_for_query(self, data_group, data_type):
empty_vars = []
if len(data_group) == 0:
empty_vars += ['Data Group']
Expand All @@ -99,12 +99,12 @@ def get_bucket_for_query(self, data_group, data_type):

data_sets = _decode_json(json_response)
if len(data_sets) > 0:
return _make_bucket_config(data_sets[0])
return _make_data_set_config(data_sets[0])

return None


def _make_bucket_config(stagecraft_dict):
def _make_data_set_config(stagecraft_dict):
if stagecraft_dict is None:
return None
return BucketConfig(**stagecraft_dict)
Expand Down
6 changes: 3 additions & 3 deletions backdrop/core/upload/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from .parse_excel import parse_excel


def create_parser(bucket_config):
format_parser = load_format_parser(bucket_config.upload_format)
upload_filters = map(load_filter, bucket_config.upload_filters)
def create_parser(data_set_config):
format_parser = load_format_parser(data_set_config.upload_format)
upload_filters = map(load_filter, data_set_config.upload_filters)

def parser(file_stream):
data = format_parser(file_stream)
Expand Down
20 changes: 10 additions & 10 deletions backdrop/core/user.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
from collections import namedtuple


def _bucket_list_is_valid(buckets):
if not isinstance(buckets, list):
def _data_set_list_is_valid(data_sets):
if not isinstance(data_sets, list):
return False

is_string = lambda value: isinstance(value, basestring)

return all(map(is_string, buckets))
return all(map(is_string, data_sets))


_UserConfig = namedtuple(
"_UserConfig",
"email buckets")
"email data_sets")


class UserConfig(_UserConfig):
def __new__(cls, email, buckets=None):
if buckets is None:
buckets = []
elif not _bucket_list_is_valid(buckets):
raise ValueError("buckets must be a list of bucket names")
def __new__(cls, email, data_sets=None):
if data_sets is None:
data_sets = []
elif not _data_set_list_is_valid(data_sets):
raise ValueError("data_sets must be a list of data_set names")

return super(UserConfig, cls).__new__(cls, email, buckets)
return super(UserConfig, cls).__new__(cls, email, data_sets)
4 changes: 2 additions & 2 deletions backdrop/core/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def key_is_internal(key):
return key.startswith('_')


def bucket_is_valid(bucket_name):
if key_is_valid(bucket_name) and not key_is_internal(bucket_name):
def data_set_is_valid(data_set_name):
if key_is_valid(data_set_name) and not key_is_internal(data_set_name):
return True
return False

Expand Down
Loading

0 comments on commit 69bcd08

Please sign in to comment.