In [1]:
from google.cloud import monitoring_v3
from google.cloud.monitoring_v3.query import Query
import datetime
import time

In [2]:
client = monitoring_v3.MetricServiceClient()
project_name = f'projects/jk-mlops-dev'
filter = 'metric.type = starts_with("custom.googleapis.com/gce/gpu")'


In [3]:
for descriptor in client.list_metric_descriptors(name=project_name, filter_=filter):
    print(f'Descriptor: {descriptor.name}')
    print(descriptor)

Descriptor: projects/jk-mlops-dev/metricDescriptors/custom.googleapis.com/gce/gpu/memory_utilization
name: "projects/jk-mlops-dev/metricDescriptors/custom.googleapis.com/gce/gpu/memory_utilization"
metric_kind: GAUGE
value_type: INT64
description: "GPU memory utilization"
type: "custom.googleapis.com/gce/gpu/memory_utilization"

Descriptor: projects/jk-mlops-dev/metricDescriptors/custom.googleapis.com/gce/gpu/utilization
name: "projects/jk-mlops-dev/metricDescriptors/custom.googleapis.com/gce/gpu/utilization"
metric_kind: GAUGE
value_type: INT64
description: "GPU utilization"
type: "custom.googleapis.com/gce/gpu/utilization"



In [37]:
def report_gpu_metric(value, seconds, nanos, type, instance_id, zone, project_id):
    series = monitoring_v3.types.TimeSeries()
    series.metric.type = 'custom.googleapis.com/{type}'.format(type=type)
    series.resource.type = 'gce_instance'
    series.resource.labels['instance_id'] = instance_id
    series.resource.labels['zone'] = zone
    series.resource.labels['project_id'] = project_id

    point = series.points.add()
    point.value.int64_value = value
    point.interval.end_time.seconds = seconds
    point.interval.end_time.nanos = nanos
    
    client.create_time_series(project_name, [series])

In [38]:
metric_type = 'gce/gpu/utilization'
instance_id = '284365999706661199'
zone = 'us-west1-b'
project_id = 'jk-mlops-dev'

In [51]:
initial_util = 60
wait_time = 8

now = time.time() - 60
seconds = int(now) 
nanos = int(
        (now - seconds) * 10**9)

for i in range(5):
    seconds += 5
    report_gpu_metric(value=initial_util+i,
                      seconds=seconds,
                      nanos=nanos,
                      type=metric_type,
                      instance_id=instance_id,
                      zone=zone,
                      project_id=project_id)
    print('wrote metric')
    time.sleep(wait_time)

wrote metric


InvalidArgument: 400 One or more TimeSeries could not be written: One or more points were written more frequently than the maximum sampling period configured for the metric.: timeSeries[0]

In [33]:
#filter = 'metric.type = "custom.googleapis.com/gce/gpu/utilization" AND metric.labels.device = "0"'
filter = 'metric.type = "custom.googleapis.com/gce/gpu/utilization"'

now = time.time()
seconds = int(now)
nanos = int((now - seconds)*10**9)
interval = monitoring_v3.types.TimeInterval(
        end_time = {"seconds": seconds, "nanos": nanos},
        start_time = {"seconds": (seconds - 240), "nanos": nanos}
)

In [34]:
responses = client.list_time_series(
        name=project_name,
        filter_=filter,
        interval=interval,
        view=monitoring_v3.types.ListTimeSeriesRequest.TimeSeriesView.FULL
)
results = list(responses)

In [35]:
for point in results[0].points:
    print(point.interval.end_time.seconds, point.value)

1615935887 int64_value: 64

1615935882 int64_value: 63

1615935877 int64_value: 62

1615935872 int64_value: 61

1615935867 int64_value: 60

1615935857 int64_value: 60

1615935849 int64_value: 60

1615935838 int64_value: 60

1615935807 int64_value: 54

1615935802 int64_value: 53

1615935797 int64_value: 52

1615935792 int64_value: 51

1615935787 int64_value: 50



In [19]:
for point in results[0].points:
   print(point)

TypeError: 'GRPCIterator' object is not subscriptable

In [9]:
dir(monitoring_v3.types)

['Aggregation',
 'AlertPolicy',
 'Any',
 'BasicSli',
 'BoolValue',
 'BytesValue',
 'CreateAlertPolicyRequest',
 'CreateGroupRequest',
 'CreateMetricDescriptorRequest',
 'CreateNotificationChannelRequest',
 'CreateServiceLevelObjectiveRequest',
 'CreateServiceRequest',
 'CreateTimeSeriesError',
 'CreateTimeSeriesRequest',
 'CreateTimeSeriesSummary',
 'CreateUptimeCheckConfigRequest',
 'DeleteAlertPolicyRequest',
 'DeleteGroupRequest',
 'DeleteMetricDescriptorRequest',
 'DeleteNotificationChannelRequest',
 'DeleteServiceLevelObjectiveRequest',
 'DeleteServiceRequest',
 'DeleteUptimeCheckConfigRequest',
 'Distribution',
 'DistributionCut',
 'DoubleValue',
 'DroppedLabels',
 'Duration',
 'Empty',
 'FieldMask',
 'FloatValue',
 'GetAlertPolicyRequest',
 'GetGroupRequest',
 'GetMetricDescriptorRequest',
 'GetMonitoredResourceDescriptorRequest',
 'GetNotificationChannelDescriptorRequest',
 'GetNotificationChannelRequest',
 'GetNotificationChannelVerificationCodeRequest',
 'GetNotificationChann

In [85]:
for descriptor in client.list_metric_descriptors(name=project_name, filter_=filter):
    print(f'Deleting: {descriptor.name}')
    client.delete_metric_descriptor(name=descriptor.name)

In [87]:
project_id = 'jk-mlops-dev'
metric_type = 'custom.googleapis.com/opencensus/gce/gpu/utilization_distribution'

In [88]:
START_TIME = datetime.datetime.fromisoformat('2021-01-22T04:00:00-00:00')
END_TIME = datetime.datetime.fromisoformat('2021-01-22T05:00:00-00:00')

In [91]:
query = Query(client, project_id, metric_type=metric_type)
query = query.select_interval(END_TIME, START_TIME)
#query = query.select_resources(instance_id='1297153725150009955')
#query = query.select_metrics(opencensus_task='py-13989@jk-test')

In [94]:
df = query.as_dataframe()
df

resource_type,gce_instance,gce_instance
project_id,jk-mlops-dev,jk-mlops-dev
zone,us-west1-b,us-west1-b
instance_id,5382047594078986292,5382047594078986292
device,0,1
2021-01-22 04:02:11.137948,count: 6\nbucket_options {\n explicit_buckets...,count: 6\nbucket_options {\n explicit_buckets...
2021-01-22 04:02:41.138088,count: 12\nbucket_options {\n explicit_bucket...,count: 12\nbucket_options {\n explicit_bucket...
2021-01-22 04:03:11.138221,count: 18\nbucket_options {\n explicit_bucket...,count: 18\nbucket_options {\n explicit_bucket...
2021-01-22 04:03:41.138345,count: 24\nbucket_options {\n explicit_bucket...,count: 24\nbucket_options {\n explicit_bucket...
2021-01-22 04:04:11.138576,count: 30\nbucket_options {\n explicit_bucket...,count: 30\nbucket_options {\n explicit_bucket...
2021-01-22 04:04:41.138740,count: 36\nbucket_options {\n explicit_bucket...,count: 36\nbucket_options {\n explicit_bucket...
2021-01-22 04:05:11.138878,count: 42\nbucket_options {\n explicit_bucket...,count: 42\nbucket_options {\n explicit_bucket...
