Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Switch database from Postgres to DynamoDB
The dataset is growing so let's try using DynamoDB. For now, we're using both boto (for DynamoDB operations) and botocore (for querying spot price data). Eventually, we'll move everything to botocore and possibly use the pynamodb interface. Also: * Start collecting data for all EC2 "products": * Linux/UNIX * Linux/UNIX (Amazon VPC) * SUSE Linux * SUSE Linux (Amazon VPC) * Windows * Windows (Amazon VPC) Previously, we only collected data for Linux/UNIX. * Handle next_token properly to pull spot price data that is more than one page. * Save the "end_time" timestamp of each run, so we know where to start from next time. * Update to latest bootstrap, jquery, and nvd3. * Tweak timestamp on graph X axis to include HH:mm. Performance-wise, the app still feels about the same as when loading data from Postgres. Possibly it's a little bit slower. This can be tuned in a future change. We might even use memcache to reduce the number of DynamoDB operations.
- Loading branch information
Showing
with
28,967 additions
and 25,579 deletions.
- +3 −0 .gitignore
- +11 −16 README.md
- +16 −73 ec2price/app.py
- +102 −62 ec2price/collector.py
- +76 −0 ec2price/model.py
- +28 −0 ec2price/static/css/base.css
- +1 −1 ec2price/static/js/main.js
- +0 −1,109 ec2price/static/vendor/bootstrap-2.3.0/css/bootstrap-responsive.css
- +0 −9 ec2price/static/vendor/bootstrap-2.3.0/css/bootstrap-responsive.min.css
- +0 −6,158 ec2price/static/vendor/bootstrap-2.3.0/css/bootstrap.css
- +0 −9 ec2price/static/vendor/bootstrap-2.3.0/css/bootstrap.min.css
- BIN ec2price/static/vendor/bootstrap-2.3.0/img/glyphicons-halflings-white.png
- BIN ec2price/static/vendor/bootstrap-2.3.0/img/glyphicons-halflings.png
- +0 −2,268 ec2price/static/vendor/bootstrap-2.3.0/js/bootstrap.js
- +0 −6 ec2price/static/vendor/bootstrap-2.3.0/js/bootstrap.min.js
- +347 −0 ec2price/static/vendor/bootstrap-3.1.1/css/bootstrap-theme.css
- +1 −0 ec2price/static/vendor/bootstrap-3.1.1/css/bootstrap-theme.css.map
- +5,785 −0 ec2price/static/vendor/bootstrap-3.1.1/css/bootstrap.css
- +1 −0 ec2price/static/vendor/bootstrap-3.1.1/css/bootstrap.css.map
- BIN ec2price/static/vendor/bootstrap-3.1.1/fonts/glyphicons-halflings-regular.eot
- +229 −0 ec2price/static/vendor/bootstrap-3.1.1/fonts/glyphicons-halflings-regular.svg
- BIN ec2price/static/vendor/bootstrap-3.1.1/fonts/glyphicons-halflings-regular.ttf
- BIN ec2price/static/vendor/bootstrap-3.1.1/fonts/glyphicons-halflings-regular.woff
- +1,951 −0 ec2price/static/vendor/bootstrap-3.1.1/js/bootstrap.js
- +0 −4 ec2price/static/vendor/d3-3.0.6/js/d3.min.js
- +6,960 −6,314 ec2price/static/vendor/{d3-3.0.6/js/d3.js → d3-3.1.5/js/d3.v3.js}
- +8,202 −7,462 ec2price/static/vendor/{jquery-1.9.1 → jquery-1.11.0}/js/jquery.js
- +0 −5 ec2price/static/vendor/jquery-1.9.1/js/jquery.min.js
- +0 −5 ec2price/static/vendor/nvd3-0.0.1a/js/nv.d3.min.js
- +151 −38 ec2price/static/vendor/{nvd3-0.0.1a → nvd3-1.1.15-beta}/css/nv.d3.css
- +4,991 −1,967 ec2price/static/vendor/{nvd3-0.0.1a → nvd3-1.1.15-beta}/js/nv.d3.js
- +6 −8 ec2price/templates/base.html
- +21 −7 ec2price/templates/main.html
- +51 −48 ec2price/web.py
- +7 −9 requirements.txt
- +1 −1 runtime.txt
- +26 −0 webassets.yml
@@ -1,80 +1,120 @@ | ||
""" | ||
Data collector | ||
""" | ||
import botocore.session | ||
|
||
import contextlib | ||
import datetime | ||
import decimal | ||
import logging | ||
import uuid | ||
|
||
import arrow | ||
import botocore.session | ||
|
||
_FMT = '%Y-%m-%dT%H:%M:%S.000Z' | ||
|
||
_SELECT_SPOT_PRICE = """ | ||
select price | ||
from spot_prices, availability_zones, instance_types | ||
where spot_prices.availability_zone_id = availability_zones.id | ||
and availability_zones.api_name = %s | ||
and spot_prices.instance_type_id = instance_types.id | ||
and instance_types.api_name = %s | ||
and spot_prices.ts = %s | ||
limit 1 | ||
""" | ||
_INSERT_SPOT_PRICE = """ | ||
with a as (select id from availability_zones where api_name = %s), | ||
i as (select id from instance_types where api_name = %s) | ||
insert into spot_prices (id, availability_zone_id, instance_type_id, ts, price) | ||
select %s, a.id, i.id, %s, %s | ||
from a, i | ||
""" | ||
_SELECT_INSTANCE_TYPES = """ | ||
select api_name | ||
from instance_types | ||
order by api_name | ||
""" | ||
_EXCLUDED_REGION_PREFIXES = ['cn-', 'us-gov-'] | ||
_FMT = 'YYYY-MM-DDTHH:mm:ss.000Z' | ||
|
||
|
||
logging.getLogger('boto').setLevel(logging.WARN) | ||
logging.getLogger('botocore').setLevel(logging.WARN) | ||
logging.getLogger('requests.packages.urllib3').setLevel(logging.WARN) | ||
|
||
|
||
def collect(db_conn, hours): | ||
def collect(model, hours): | ||
row = model.progress.get_item(name='end_time') | ||
start_time = arrow.get(row['timestamp']) | ||
#logging.debug('window: past %s hours', hours) | ||
#start_time = arrow.utcnow().replace(hours=-hours) | ||
logging.debug('start time: %s', start_time) | ||
|
||
end_time = arrow.utcnow() | ||
logging.debug('end time: %s', end_time) | ||
|
||
all_regions = set() | ||
all_product_descriptions = set() | ||
all_instance_types = set() | ||
all_instance_zones = set() | ||
|
||
session = botocore.session.get_session() | ||
ec2 = session.get_service('ec2') | ||
operation = ec2.get_operation('DescribeSpotPriceHistory') | ||
|
||
d = datetime.datetime.utcnow() - datetime.timedelta(hours=hours) | ||
start_time = d.strftime(_FMT) | ||
for region in ec2.region_names: | ||
if any(region.startswith(x) for x in _EXCLUDED_REGION_PREFIXES): | ||
continue | ||
all_regions.add(region) | ||
|
||
with contextlib.closing(db_conn.cursor()) as cursor: | ||
cursor.execute(_SELECT_INSTANCE_TYPES) | ||
rows = cursor.fetchall() | ||
instance_types = [r['api_name'] for r in rows] | ||
next_token = None | ||
while True: | ||
logging.debug('collecting spot prices from region: %s', region) | ||
endpoint = ec2.get_endpoint(region) | ||
if next_token: | ||
response, data = operation.call( | ||
endpoint, | ||
start_time=start_time.format(_FMT), | ||
end_time=end_time.format(_FMT), | ||
next_token=next_token, | ||
) | ||
else: | ||
response, data = operation.call( | ||
endpoint, | ||
start_time=start_time.format(_FMT), | ||
) | ||
next_token = data.get('NextToken') | ||
logging.debug('next_token: %s', next_token) | ||
spot_data = data.get('SpotPriceHistory', []) | ||
|
||
for region in ec2.region_names: | ||
logging.debug('collecting spot prices from region: %s', region) | ||
endpoint = ec2.get_endpoint(region) | ||
response, data = operation.call( | ||
endpoint, | ||
instance_types=instance_types, | ||
product_descriptions=['Linux/UNIX'], | ||
start_time=start_time, | ||
) | ||
for i in data.get('spotPriceHistorySet', []): | ||
with contextlib.closing(db_conn.cursor()) as cursor: | ||
cursor.execute(_SELECT_SPOT_PRICE, [ | ||
i['availabilityZone'], | ||
i['instanceType'], | ||
i['timestamp'], | ||
]) | ||
row = cursor.fetchone() | ||
if not row: | ||
logging.debug('inserting spot price: %s', i) | ||
cursor.execute(_INSERT_SPOT_PRICE, [ | ||
i['availabilityZone'], | ||
i['instanceType'], | ||
uuid.uuid4(), | ||
i['timestamp'], | ||
i['spotPrice'], | ||
]) | ||
db_conn.commit() | ||
#conn = boto.ec2.connect_to_region(r.name) | ||
#logging.debug('getting spot prices for region: %s', r.name) | ||
#data = conn.get_spot_price_history(start_time=start_time) | ||
|
||
logging.debug('saving %d spot prices for region: %s', | ||
len(spot_data), region) | ||
with model.spot_prices.batch_write() as batch: | ||
for d in spot_data: | ||
all_product_descriptions.add(d['ProductDescription']) | ||
all_instance_types.add(d['InstanceType']) | ||
all_instance_zones.add(( | ||
d['ProductDescription'], | ||
d['InstanceType'], | ||
d['AvailabilityZone'], | ||
)) | ||
batch.put_item(data={ | ||
'instance_zone_id': ':'.join([ | ||
d['ProductDescription'], | ||
d['InstanceType'], | ||
d['AvailabilityZone'], | ||
]), | ||
'timestamp': arrow.get(d['Timestamp']).timestamp, | ||
'price': decimal.Decimal(str(d['SpotPrice'])), | ||
}) | ||
if not next_token: | ||
break | ||
|
||
logging.debug('saving %d regions', len(all_regions)) | ||
with model.regions.batch_write() as batch: | ||
for i in all_regions: | ||
batch.put_item(data={'region': i}) | ||
|
||
logging.debug('saving %d product_descriptions', | ||
len(all_product_descriptions)) | ||
with model.product_descriptions.batch_write() as batch: | ||
for i in all_product_descriptions: | ||
batch.put_item(data={'product_description': i}) | ||
|
||
logging.debug('saving %d instance_types', len(all_instance_types)) | ||
with model.instance_types.batch_write() as batch: | ||
for i in all_instance_types: | ||
batch.put_item(data={'instance_type': i}) | ||
|
||
logging.debug('saving %d instance_zones', len(all_instance_zones)) | ||
with model.instance_zones.batch_write() as batch: | ||
for i in all_instance_zones: | ||
batch.put_item(data={ | ||
'instance_id': ':'.join([i[0], i[1]]), | ||
'zone': i[2], | ||
}) | ||
|
||
logging.debug('saving end_time') | ||
with model.progress.batch_write() as batch: | ||
batch.put_item(data={ | ||
'name': 'end_time', | ||
'timestamp': end_time.timestamp, | ||
}) |
Oops, something went wrong.