/
calculate.py
73 lines (58 loc) · 2.76 KB
/
calculate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from __future__ import division
from pprint import pprint
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
from gcloud import GenomicsOperation, OperationCostCalculator
from cromwell import Metadata
from collections import defaultdict
import json
import sys
import math
import argparse
class CromwellCostCalculator(object):
def __init__(self, pricelist):
credentials = GoogleCredentials.get_application_default()
self.service = discovery.build('genomics', 'v1', credentials=credentials)
self.calculator = OperationCostCalculator(pricelist)
def get_operation_metadata(self, name):
request = self.service.operations().get(name=name)
response = request.execute()
return response
@staticmethod
def dollars(raw_cost):
return math.ceil(raw_cost * 100) / 100
def calculate_cost(self, metadata_json):
metadata = Metadata(metadata_json)
total_cost = 0
max_samples = -1
summary_json = { 'tasks': [], 'total_cost': None, 'cost_per_shard': None }
for task, executions in metadata.calls().iteritems():
task_totals = defaultdict(int)
for e in executions:
if e.jobid() is None: continue
op = GenomicsOperation(self.get_operation_metadata(e.jobid()))
print 'operation: {}'.format(op)
task_totals[e.shard()] = task_totals[e.shard()] + self.dollars(self.calculator.cost(op))
total_cost += self.dollars(self.calculator.cost(op))
summary_json['tasks'].append({
'name': task,
'shards': len(task_totals),
'cost_per_shard': self.dollars(sum(task_totals.values())/len(task_totals)) if len(task_totals) != 0 else 0,
'total_cost': self.dollars(sum(task_totals.values()))
})
max_samples = max(max_samples, len(task_totals))
summary_json['total_cost'] = total_cost
summary_json['cost_per_shard'] = total_cost / max_samples
return summary_json
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('pricelist', type=argparse.FileType('r'), help='pricelist.json from Google containing cost information')
parser.add_argument('metadata', type=argparse.FileType('r'), help='metadata from a cromwell workflow from which to estimate cost')
args = parser.parse_args()
metadata = json.load(args.metadata)
pricelist = json.load(args.pricelist)
calc = CromwellCostCalculator(pricelist)
cost = calc.calculate_cost(metadata)
print json.dumps(cost, sort_keys=True, indent=4)
print 'Total: ${0}'.format(cost['total_cost'])
print 'Per Shard: ${0}'.format(cost['cost_per_shard'])