In [1]:
import logging
import datetime

def extract_last_value(machine_url):
    str = machine_url.split("/")
    return(str[-1])    

def extract_key_value(key_data,value):
    value_data = ''
    for i in range(len(key_data)):
        if key_data[i]['key'] == value:
            value_data = key_data[i]['value']
            break
        else:
            value_data = ''
    return value_data

def get_api(url):
    import google.auth
    credentials, project = google.auth.default()
    from google.auth.transport.requests import AuthorizedSession
    authed_session = AuthorizedSession(credentials)
    response = authed_session.get(url)
    return response

def get_createdby(instance_id):
    from google.cloud import logging
    client = logging.Client()
    createdby = ''
    
    filter_str = 'resource.type="gce_instance" \
    logName="projects/sb-bigdata-4985-da852265/logs/cloudaudit.googleapis.com%2Factivity" \
    protoPayload.methodName="v1.compute.instances.insert" \
    operation.first=true \
    protoPayload.request.disks.initializeParams.sourceImage:"projects/deeplearning-platform-release/global/images/family/" \
    resource.labels.instance_id="' + str(instance_id) + '"'
    
    for entry in client.list_entries(filter_=filter_str):  # API call(s)
        from chardet import detect
        payloaddata = entry.payload.value.decode('cp1254', errors='ignore').split('@accenture.com')[0]
        import re
        createdby = re.sub(r"[^a-z0-9.]","",payloaddata.lower())
    
    return createdby
    
def get_notebooks_df(notebook_data_df):
    framework = [] # list
    name = [] # list
    instance_id = []
    created_at = []
    label = []
    status = []
    machine_type = []
    sa_account = []
    proxy_mode = []
    proxy_user_email = []
    gpu_name = []
    zone=[]
    network = []
    subnetwork = []
    bootdisksize=[]
    bootdisktype=[]
    created_by=[]
    
    for i in range(0,len(notebook_data_df)):
         #print(notebook_data_df[i])
        notebook_data_df[i]['machineType']
        name.append(str(notebook_data_df[i]['name']))
        instance_id.append(notebook_data_df[i]['id'])
        created_at.append(notebook_data_df[i]['creationTimestamp'])
        created_by.append(get_createdby(notebook_data_df[i]['id']))
        machine_type.append(extract_last_value(notebook_data_df[i]['machineType']))
        zone.append(extract_last_value(notebook_data_df[i]['zone']))
        network.append(extract_last_value(notebook_data_df[i]['networkInterfaces'][0]['network']))
        subnetwork.append(extract_last_value(notebook_data_df[i]['networkInterfaces'][0]['subnetwork']))
        labels = notebook_data_df[i].get('labels')
        label.append(labels)
        status.append(notebook_data_df[i]['status'])
        sa_account.append(notebook_data_df[i]['serviceAccounts'][0]['email'])
        proxy_mode.append(extract_key_value(notebook_data_df[i]['metadata']['items'],'proxy-mode'))
        proxy_user_email.append(extract_key_value(notebook_data_df[i]['metadata']['items'],'proxy-user-mail'))
        framework.append(extract_key_value(notebook_data_df[i]['metadata']['items'],'framework'))
        #gpu_names = notebook_data_df[i].get('guestAccelerators')[0]['acceleratorType']
        notebook_gpu_name = ''
        gpu_names = ''
        gpu_names = notebook_data_df[i].get('guestAccelerators')
        if gpu_names is not None:
            notebook_gpu_name = gpu_names[0]['acceleratorType']
        gpu_name.append(extract_machine_type(notebook_gpu_name))
        bootdisk_url = notebook_data_df[i]['disks'][0]['source']
        response = get_api(bootdisk_url)
        import json
        data_df = json.loads(response.text)
        bootdisksize.append(data_df['sizeGb'])
        bootdisktype.append(extract_last_value(data_df['type']))
        
        
        import pandas as pd
        notebook_list = pd.DataFrame(
            {'name': name,
             'instanceid':instance_id,
             'zone': zone,
             'network': network,
             'subnetwork': subnetwork,
             'createdat':created_at,
             'createdby':created_by,
             'machinetype' : machine_type,
             'label': label,
             'status' : status,
             'serviceaccount' : sa_account,
             'proxymode':proxy_mode,
             'proxyuseremail':proxy_user_email,
             'framework': framework,
             'gpu':gpu_name,
             'bootdisksize':bootdisksize,
             'bootdisktype':bootdisktype
            },
            columns=['name', 'instanceid', 'zone','network','subnetwork', 'createdat','createdby','machinetype','label','status','serviceaccount','proxymode','proxyuseremail','framework','gpu', 'bootdisksize', 'bootdisktype']
        )
    return notebook_list

def write_to_bqtable(bq_tablename, bq_schemaname, datadf):
    import google.auth
    from google.cloud import bigquery
    credentials, project = google.auth.default()
    client = bigquery.Client(project)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = bq_schemaname
    
    job = client.load_table_from_dataframe(datadf, bq_tablename, job_config=job_config)
    # Wait for the load job to complete.
    job.result()

def get_data(request): 
    notebook = []
    response = get_api('https://compute.googleapis.com/compute/v1/projects/sb-bigdata-4985-da852265/aggregated/instances?filter=tags.tag%20:%20deeplearning-vm')
    if response.status_code == 200:
        import json
        import pandas as pd
        data_df = pd.read_json(response.text)
        from pandas.io.json import json_normalize  
        json_norm = json_normalize(data_df['items'])
        na_data = json_norm[json_norm['instances'].notna()]
        for value in na_data['instances']:
            for instance_data in value:
                notebook.append(instance_data)
        print("AI platform notebook count : ",str(len(notebook)))
        notebook = get_notebooks_df(notebook)
        notebook.insert(0, 'date', str(datetime.datetime.utcnow()))
        from google.cloud import bigquery
        varbqschema=[
            bigquery.SchemaField(name="date", field_type="STRING"),
            bigquery.SchemaField(name="name", field_type="STRING"),
            bigquery.SchemaField(name="instanceid", field_type="STRING"),
            bigquery.SchemaField(name="zone", field_type="STRING"),
            bigquery.SchemaField(name="network", field_type="STRING"),
            bigquery.SchemaField(name="subnetwork", field_type="STRING"),
            bigquery.SchemaField(name="createdat", field_type="STRING"),
            bigquery.SchemaField(name="createdby", field_type="STRING"),
            bigquery.SchemaField(name="machinetype", field_type="STRING"),
            bigquery.SchemaField(name="label", field_type="STRING"),
            bigquery.SchemaField(name="status", field_type="STRING"),
            bigquery.SchemaField(name="serviceaccount", field_type="STRING"),
            bigquery.SchemaField(name="proxymode", field_type="STRING"),
            bigquery.SchemaField(name="proxyuseremail", field_type="STRING"),
            bigquery.SchemaField(name="framework", field_type="STRING"),
            bigquery.SchemaField(name="gpu", field_type="STRING"),
            bigquery.SchemaField(name="bootdisksize", field_type="STRING"),
            bigquery.SchemaField(name="bootdisktype", field_type="STRING")
        ]
        write_to_bqtable('modelmanagement.notebooks', varbqschema, notebook.astype(str))
    else:
        print(response.text)
    return "Done!"

In [4]:
notebook_list = get_data(None)
notebook_list

AI platform notebook count :  9


NameError: name 'extract_machine_type' is not defined