In [15]:
import json
import re

In [5]:
with open('./metrics.json') as jsonFile:
  data = json.load(jsonFile)

{'service': [{'name': 'actions',
   'launchStates': 'Metrics from Google Assistant Smart Home. Launch stages of these metrics: ALPHA BETA',
   'description': 'The "metric type" strings in this table must be prefixed with actions.googleapis.com/. That prefix has been omitted from the entries in the table.',
   'serviceIndex': 0},
  {'name': 'aiplatform',
   'launchStates': 'Metrics from Vertex AI. Launch stages of these metrics: BETA',
   'description': 'The "metric type" strings in this table must be prefixed with aiplatform.googleapis.com/. That prefix has been omitted from the entries in the table.',
   'serviceIndex': 1},
  {'name': 'alloydb',
   'launchStates': 'Metrics from AlloyDB for PostgreSQL. Launch stages of these metrics: BETA GA',
   'description': 'The "metric type" strings in this table must be prefixed with alloydb.googleapis.com/. That prefix has been omitted from the entries in the table.',
   'serviceIndex': 2},
  {'name': 'apigateway',
   'launchStates': 'Metrics fr

In [9]:
serviceData = data['service']

In [10]:
serviceDatum = serviceData[0]
serviceDatum

{'name': 'actions',
 'launchStates': 'Metrics from Google Assistant Smart Home. Launch stages of these metrics: ALPHA BETA',
 'description': 'The "metric type" strings in this table must be prefixed with actions.googleapis.com/. That prefix has been omitted from the entries in the table.',
 'serviceIndex': 0}

In [21]:
launchParts = serviceDatum['launchStates'].split(' Launch stages of these metrics: ')
launchParts

['Metrics from Google Assistant Smart Home.', 'ALPHA BETA']

In [22]:
source = re.match('Metrics from (.*)\.', launchParts[0])
source.group(1)

'Google Assistant Smart Home'

In [65]:

reorgedData = []
failedItems = []
for serviceDatum in serviceData:
    print(serviceDatum)

    try:

        lauchStates = serviceDatum['launchStates']
        launchParts = lauchStates.split(' Launch stages of these metrics: ')
        launchStages = launchParts[1]
        sourceMatch = re.match('Metrics from (.*)\.?', launchParts[0])
        source = sourceMatch.group(1)
        prefixMatch = re.match('The "metric type" strings in this table must be prefixed with ([a-z\.]*)/. That prefix has been omitted from the entries in the table.', serviceDatum['description'])
        prefix = prefixMatch.group(1)


        if len(launchParts) !=2 or not source or len(launchStages) ==0 or not prefix:
            raise Exception(f'invalid launch states: "{lauchStates}"')
        
    except:
        failedItems.append(serviceDatum)
        continue

    reorgedDatum = {
        'name': serviceDatum['name'],
        'source': source,
        'launchStages': launchStages,
        'prefix': prefix,
        'serviceIndex': serviceDatum['serviceIndex']
    }

    reorgedData.append(reorgedDatum)

{'name': 'actions', 'launchStates': 'Metrics from Google Assistant Smart Home. Launch stages of these metrics: ALPHA BETA', 'description': 'The "metric type" strings in this table must be prefixed with actions.googleapis.com/. That prefix has been omitted from the entries in the table.', 'serviceIndex': 0}
{'name': 'aiplatform', 'launchStates': 'Metrics from Vertex AI. Launch stages of these metrics: BETA', 'description': 'The "metric type" strings in this table must be prefixed with aiplatform.googleapis.com/. That prefix has been omitted from the entries in the table.', 'serviceIndex': 1}
{'name': 'alloydb', 'launchStates': 'Metrics from AlloyDB for PostgreSQL. Launch stages of these metrics: BETA GA', 'description': 'The "metric type" strings in this table must be prefixed with alloydb.googleapis.com/. That prefix has been omitted from the entries in the table.', 'serviceIndex': 2}
{'name': 'apigateway', 'launchStates': 'Metrics from API Gateway. Launch stages of these metrics: BETA

In [66]:
(len(reorgedData), len(failedItems))

(77, 7)

In [67]:
failedItems

[{'name': 'GA',
  'launchStates': '',
  'description': 'The "metric type" strings in this table must be prefixed with bigquery.googleapis.com/. That prefix has been omitted from the entries in the table.',
  'serviceIndex': 8},
 {'name': 'GA',
  'launchStates': '',
  'description': 'The "metric type" strings in this table must be prefixed with cloudtasks.googleapis.com/. That prefix has been omitted from the entries in the table.',
  'serviceIndex': 19},
 {'name': 'GA',
  'launchStates': '',
  'description': 'The "metric type" strings in this table must be prefixed with compute.googleapis.com/. That prefix has been omitted from the entries in the table.',
  'serviceIndex': 22},
 {'name': 'container',
  'launchStates': 'Metrics from Google Kubernetes Engine. For newer Kubernetes metrics, see the kubernetes.io metrics.',
  'description': 'The "metric type" strings in this table must be prefixed with container.googleapis.com/. That prefix has been omitted from the entries in the table.',


In [68]:
correctedItems = [
    {'name': 'bigquery',
     'source': 'BigQuery',
     'launchStages': 'BETA EARLY_ACCESS GA',
     'prefix': 'bigquery.googleapis.com',
     'serviceIndex': 8,
     },
    {'name': 'cloudtasks',
     'source': 'Cloud Tasks (formerly App Engine Task Queue)',
     'launchStages': 'BETA GA',
     'prefix': 'cloudtasks.googleapis.com',
     'serviceIndex': 19,
     },
    {'name': 'compute',
     'source': ' Compute Engine',
     'launchStages': 'ALPHA BETA GA',
     'prefix': 'compute.googleapis.com',
     'serviceIndex': 22,
     },
    {'name': 'container',
     'source': 'Google Kubernetes Engine',
     'launchStages': '',
     'prefix': 'container.googleapis.com',
     'serviceIndex': 24,
     },
    {'name': 'livestream',
     'source': 'Live Stream API',
     'launchStages': 'BETA',
     'prefix': 'livestream.googleapis.com',
     'serviceIndex': 52,
     },
    {'name': 'loadbalancing',
     'source': 'Cloud Load Balancing',
     'launchStages': 'BETA GA',
     'prefix': 'loadbalancing.googleapis.com',
     'serviceIndex': 53,
     },
    {'name': 'serviceruntime',
     'source': 'all Google Cloud APIs (resource type consumed_api) and to your APIs created with Cloud Endpoints (resource type api)',
     'launchStages': '',
     'prefix': 'serviceruntime.googleapis.com',
     'serviceIndex': 72,
     }
]


In [69]:
allReorgedData = reorgedData + correctedItems
allReorgedData.sort(key=lambda x: x['serviceIndex'])
allReorgedData

[{'name': 'actions',
  'source': 'Google Assistant Smart Home.',
  'launchStages': 'ALPHA BETA',
  'prefix': 'actions.googleapis.com',
  'serviceIndex': 0},
 {'name': 'aiplatform',
  'source': 'Vertex AI.',
  'launchStages': 'BETA',
  'prefix': 'aiplatform.googleapis.com',
  'serviceIndex': 1},
 {'name': 'alloydb',
  'source': 'AlloyDB for PostgreSQL.',
  'launchStages': 'BETA GA',
  'prefix': 'alloydb.googleapis.com',
  'serviceIndex': 2},
 {'name': 'apigateway',
  'source': 'API Gateway.',
  'launchStages': 'BETA',
  'prefix': 'apigateway.googleapis.com',
  'serviceIndex': 3},
 {'name': 'apigee',
  'source': 'Apigee.',
  'launchStages': 'ALPHA BETA',
  'prefix': 'apigee.googleapis.com',
  'serviceIndex': 4},
 {'name': 'appengine',
  'source': 'App Engine. Note: If you use App Engine flexible environment in combination with Cloud Load Balancing, then certain metrics in the following table are reported as metrics from the loadbalancing table instead. For more information, see External 

In [73]:
metricsData = data['serviceMetrics']

In [72]:
metricsData[0]

{'metricInfo': {'name': 'smarthome_action/camerastream/first_frame_latencies',
  'launchStatus': 'BETA',
  'displayName': 'First frame latency'},
 'dataInfo': {'kind': 'DELTA',
  'type': 'DISTRIBUTION',
  'unit': 'milliseconds',
  'monitoredResources': 'assistant_action_project',
  'description': 'Latency for first frame to arrive. Sampled every 60 seconds. After sampling, data is not visible for up to 180 seconds.',
  'labels': ['device_type :\n\n              Device type, one of [CAMERA, DOORBELL]',
   'stream_status :\n\n              The canonical code that represents the result, for example, "COMPLETED" or "FAILED".',
   'stream_protocol :\n\n              Underlying protocol used for camerastream, for example, "webrtc" or "hls".',
   'is_battery_powered :\n\n                (BOOL)\n\n              Whether the device is battery powered or not.']},
 'serviceIndex': 0}

In [98]:
def validateMetricDatum(metricsDatum):
    metricInfo=metricsDatum['metricInfo']
    dataInfo = metricsDatum['dataInfo']
    assert len(metricInfo['name'])>0, 'invalid name'
    assert len(metricInfo['displayName'])>0, 'invalid displayName'
    assert len(dataInfo['kind'])>0, 'invalid kind'
    assert len(dataInfo['type'])>0, 'invalid type'
    # assert len(dataInfo['unit'])>0, f'invalid unit "{dataInfo.unit}"'
    # assert len(dataInfo['monitoredResources'])>0, 'invalid monitoredResources'
    assert len(dataInfo['description'])>0, 'invalid description'



validateMetricDatum(metricsData[0])

In [132]:
validMetricsData = []
invalidMetricsData = []
for metricsDatum in metricsData:
    try:
        validateMetricDatum(metricsDatum)
        metricsDatum['dataInfo']['labels']=list(map(lambda x: re.sub('\s+', ' ', x), metricsDatum['dataInfo']['labels']))
        validMetricsData.append(metricsDatum)
    except Exception as e:
        print(e)
        invalidMetricsData.append(metricsDatum)


In [133]:
(len(validMetricsDatab), len(invalidMetricsData))

(1487, 0)

In [138]:
correctedJson = {
    'service': allReorgedData,
    'serviceMetrics': validMetricsData
}

In [139]:
with open('validatedMetrics.json', 'w+') as jsonFile:
    json.dump(correctedJson, jsonFile)

In [116]:
metricsData[0]['dataInfo']['labels'][3].replace(' :\n\n              ', ': ').replace('\n\n              ', ': ')

'is_battery_powered:   (BOOL)\n\n              Whether the device is battery powered or not.'

In [118]:
metricsData[0]['dataInfo']['labels'][3].replace('\n','', 10)

'is_battery_powered :                (BOOL)              Whether the device is battery powered or not.'

In [131]:
re.sub('\s+', ' ', metricsData[0]['dataInfo']['labels'][3]) #.replace('\n','', -1))

'is_battery_powered : (BOOL) Whether the device is battery powered or not.'