In [1]:
import datetime
from datetime import timedelta
import concurrent.futures
import json
import re
import logging
import os
from os import path
import sys
import warnings
from typing import List
import pickle

import numpy as np
import pandas as pd
import statistics
from math import isnan, pi

import google.auth
from google.cloud import bigquery

In [31]:
class QueryBQToMonitor:
    
    def __init__(self, billing_project, workflow_ids, days_back_upper_bound, days_back_lower_bound):
        
        self.logger = logging.getLogger()
        self.logger.setLevel(logging.INFO)

        h = logging.StreamHandler(sys.stderr)
        h.flush = sys.stderr.flush
        self.logger.addHandler(h)
        
        self.billing_project = billing_project
        
        self.formated_workflow_ids = "'"+ "', '".join(workflow_ids) + "'"
        
        self.days_back_upper_bound = days_back_upper_bound
        self.days_back_lower_bound = days_back_lower_bound
        
        # Explicitly create a credentials object. This allows you to use the same
        # credentials for both the BigQuery and BigQuery Storage clients, avoiding
        # unnecessary API calls to fetch duplicate authentication tokens.
        credentials, project_id = google.auth.default(
            scopes=["https://www.googleapis.com/auth/cloud-platform"]
        )

        # Make clients.
        self.bq_client = bigquery.Client(credentials=credentials, project=self.billing_project )
        
    def query(self):
        self._get_runtime_and_metadata()
        self._get_metrics()
    
    def _get_runtime_and_metadata(self):
        
        self._fetch_runtime()
        self._fetch_metadata()
        
        runtime_nrow, runtime_ncol = self.runtime.shape
        meta_nrow, meta_ncol = self.metadata.shape
        
        # basic QC
        if (meta_nrow != runtime_nrow):
            self.logger.warning('Metadata and runtime number of rows are different. You might want to check.')
        summary_msg = f"Nrows of runtime: {runtime_nrow}, Ncols of runtime: {runtime_ncol}, \nNrows of meta: {meta_nrow}, Ncols of meta: {meta_ncol}"
        self.logger.info(summary_msg)
        
        # merge the two
        self.metadata_runtime = pd.merge(self.metadata, self.runtime, left_on='meta_instance_name', right_on='runtime_instance_name')
        print()
        self.metadata_runtime.runtime_task_call_name.describe()

    def _fetch_runtime(self):
        # query runtime data
        runtime_sql = f"""

        SELECT

          runtime.attempt AS runtime_attempt,
          runtime.cpu_count AS runtime_cpu_count,
          runtime.cpu_platform AS runtime_cpu_platform,
          runtime.disk_mounts AS runtime_disk_mounts,
          runtime.disk_total_gb AS runtime_disk_total_gb,
          runtime.instance_id AS runtime_instance_id,
          runtime.instance_name AS runtime_instance_name,
          runtime.mem_total_gb AS runtime_mem_total_gb,
          runtime.preemptible AS runtime_preemptible,
          runtime.project_id AS runtime_project_id,
          runtime.shard AS runtime_shard,
          runtime.start_time AS runtime_start_time,
          runtime.task_call_name AS runtime_task_call_name,
          runtime.workflow_id AS runtime_workflow_id,
          runtime.zone AS runtime_zone

        FROM
          `{self.billing_project}.cromwell_monitoring.runtime`  runtime 

        WHERE
              DATE(runtime.start_time) >= DATE_SUB(CURRENT_DATE(), INTERVAL {self.days_back_upper_bound} DAY)
          AND DATE(runtime.start_time) <= DATE_SUB(CURRENT_DATE(), INTERVAL {self.days_back_lower_bound} DAY)

          AND runtime.workflow_id IN ({self.formated_workflow_ids})    
        """
        self.runtime = self.bq_client.query(query = runtime_sql).to_dataframe()
        self.logger.info("Fetched runtime table.")
    
    def _fetch_metadata(self):
        # query metadata table
        metadata_sql = f"""

        SELECT
          metadata.attempt AS meta_attempt,
          metadata.cpu_count AS meta_cpu,
          metadata.disk_mounts AS meta_disk_mounts,
          metadata.disk_total_gb AS meta_disk_total_gb,
          metadata.disk_types AS meta_disk_types,
          metadata.docker_image AS meta_docker_image,
          metadata.end_time AS meta_end_time,
          metadata.execution_status AS meta_execution_status,
          metadata.inputs AS meta_inputs,
          metadata.instance_name AS meta_instance_name,
          metadata.mem_total_gb AS meta_mem_total_gb,
          metadata.preemptible AS meta_preemptible,
          metadata.project_id AS meta_project_id,
          metadata.shard AS meta_shard,
          metadata.start_time AS meta_start_time,
          metadata.task_call_name AS meta_task_call_name,
          metadata.workflow_id AS meta_workflow_id,
          metadata.workflow_name AS meta_workflow_name,
          metadata.zone AS meta_zone,
          TIMESTAMP_DIFF(metadata.end_time, metadata.start_time, SECOND) meta_duration_sec

        FROM
          `{self.billing_project}.cromwell_monitoring.metadata` metadata

        WHERE
              DATE(metadata.start_time) >= DATE_SUB(CURRENT_DATE(), INTERVAL {self.days_back_upper_bound} DAY)
          AND DATE(metadata.start_time) <= DATE_SUB(CURRENT_DATE(), INTERVAL {self.days_back_lower_bound} DAY)

          AND metadata.workflow_id IN ({self.formated_workflow_ids})    
        """
        self.metadata = self.bq_client.query(query = metadata_sql).to_dataframe()
        self.logger.info("Fetched metadata table")
            
    def _get_metrics(self):
        
        b = datetime.datetime.now()
        start_time = b.strftime("%H:%M:%S")
        self.logger.info(f"Started querying metrics on {start_time}.")
        # provision jobs
        instance_ids = list(self.metadata_runtime.runtime_instance_id.unique())
        n = 8 # magic number based on experience
        cap = len(instance_ids) // 8

        ids_pool = dict()
        jobs_pool = dict()
        with concurrent.futures.ThreadPoolExecutor() as executor:

            for i in range(n+1):
                start = i * cap
                if (i != n):
                    end = (i+1) * cap - 1
                else:
                    end = len(instance_ids)
                ids_pool[i] = instance_ids[start:end]
                jobs_pool[i] = executor.submit(self._fetch_metrics_on_vms_batch, ids_pool[i])


        results_pool = dict()
        for i in range(n+1):
            results_pool[i] = jobs_pool[i].result()
        
        
        f = datetime.datetime.now()
        finish_time = f.strftime("%H:%M:%S")
        pf = f - b
        s = pf.seconds
        hours, remainder = divmod(s, 3600)
        minutes, seconds = divmod(remainder, 60)
        elapse = '{:02}:{:02}:{:02}'.format(int(hours), int(minutes), int(seconds))
        self.logger.info(f"Finished on {finish_time}.")
        self.logger.info(f"Totalling {elapse}.")
       
        
        l = list(results_pool.values())            
        self.metrics = pd.concat(l)
        
        # QC
        retries = 0
        d = set(self.metadata_runtime.runtime_instance_id.unique()) - set(self.metrics.metrics_instance_id.unique())
        while( (not d) and 10>retries ):
            self.logger.info(f"Retrieving metrics info on leftovers: {d}")
            left_over = self._fetch_metrics_on_vms_batch(d)
            if (not left_over.empty):
                self.metrics = pd.concat([self.metrics, left_over], axis=0)
            d = set(self.metadata_runtime.runtime_instance_id.unique()) - set(self.metrics.metrics_instance_id.unique())
            retries += 1
        if (0!=d):
            self.logger.warning(f"Not all VMs provisioned have their metrics sent over ({d} didn't).")

    def _fetch_metrics_on_vms_batch(self, vm_instance_ids):
        if len(vm_instance_ids) == 0:
            return pd.DataFrame()
        ids_string = ', '.join(map(str, vm_instance_ids))
        metrics_sql = f"""

        SELECT

          metrics.cpu_used_percent AS metrics_cpu_used_percent,
          metrics.disk_read_iops AS metrics_disk_read_iops,
          metrics.disk_used_gb AS metrics_disk_used_gb,
          metrics.disk_write_iops AS metrics_disk_write_iops,
          metrics.instance_id AS metrics_instance_id,
          metrics.mem_used_gb AS metrics_mem_used_gb,
          metrics.timestamp AS metrics_timestamp

        FROM
          `{self.billing_project}.cromwell_monitoring.metrics`  metrics

        WHERE

              DATE(metrics.timestamp) >= DATE_SUB(CURRENT_DATE(), INTERVAL {self.days_back_upper_bound} DAY)
          AND DATE(metrics.timestamp) <= DATE_SUB(CURRENT_DATE(), INTERVAL {self.days_back_lower_bound} DAY)
          AND metrics.instance_id IN ({ids_string})

        """
        return self.bq_client.query(metrics_sql).to_dataframe()
    
    def to_pickle(file_name: str):
        pickle.dump()
    
    def from_pickle(file_name: str):
        pass

In [3]:
credentials, project_id = google.auth.default()
print(credentials.__dict__.keys())

dict_keys(['token', 'expiry', '_quota_project_id', '_scopes', '_default_scopes', '_refresh_token', '_id_token', '_token_uri', '_client_id', '_client_secret', '_rapt_token'])


In [4]:
days_back_upper_bound = 5
days_back_lower_bound = 0
billing_project = 'broad-dsp-lrma'

In [13]:
def query_for_a_run(workflow_ids: List[str],
                    picket_files_prefix: str) -> QueryBQToMonitor:
    
    clean_workflow_ids = [wid.strip('"') for wid in workflow_ids]
    print(clean_workflow_ids)
    
    monitoring_dataset = QueryBQToMonitor(billing_project,
                                          clean_workflow_ids,
                                          days_back_upper_bound,
                                          days_back_lower_bound)
    monitoring_dataset.query()

    pickle.dump(monitoring_dataset.runtime,
                open( f"{picket_files_prefix}.runtime.pickle", "wb" ) )
    pickle.dump(monitoring_dataset.metadata,
                open( f"{picket_files_prefix}.metadata.pickle", "wb" ) )
    pickle.dump(monitoring_dataset.metadata_runtime,
                open( f"{picket_files_prefix}.metadata_runtime.pickle", "wb" ) )
    pickle.dump(monitoring_dataset.metrics,
                open( f"{picket_files_prefix}.metrics.pickle", "wb" ) )

# CCS_small

In [8]:
ccs_small_WIDs = list()
with open('slim_metadata/CCS_small.workflow-ids.all.txt') as f:
    ccs_small_WIDs= [l.strip() for l in f.readlines()]
print(ccs_small_WIDs)

['4b0ed486-076a-4fd6-ae4d-5919fc73d212', '4dec745a-577e-4fde-a897-6c13df261a34', 'c51669b8-a137-4890-98ae-fd061cb140b0']


In [14]:
query_for_a_run(ccs_small_WIDs, 'CCS_small')

['4b0ed486-076a-4fd6-ae4d-5919fc73d212', '4dec745a-577e-4fde-a897-6c13df261a34', 'c51669b8-a137-4890-98ae-fd061cb140b0']


Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched metadata table
Fetched metadata table
Fetched metadata table
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Nrows of runtime: 222, Ncols of runtime: 15, 
Nrows of meta: 546, Ncols of meta: 20
Nrows of runtime: 222, Ncols of runtime: 15, 
Nrows of meta: 546, Ncols of meta: 20
Nrows of runtime: 222, Ncols of runtime: 15, 
Nrows of meta: 546, Ncols of meta: 20
Started querying metrics on 20:54:24.
Started querying metrics on 20:54:24.
Started querying metrics on 20:54:24.





Finished on 20:54:54.
Finished on 20:54:54.
Finished on 20:54:54.
Totalling 00:00:30.
Totalling 00:00:30.
Totalling 00:00:30.
Not all VMs provisioned have their metrics sent over ({3932671187005565248, 3307748995851422918, 3457685651319753958, 5644029351665142443, 8520057775120803724, 6085224541387294674, 7397400968018593300, 8126928638149987893} didn't).
Not all VMs provisioned have their metrics sent over ({3932671187005565248, 3307748995851422918, 3457685651319753958, 5644029351665142443, 8520057775120803724, 6085224541387294674, 7397400968018593300, 8126928638149987893} didn't).
Not all VMs provisioned have their metrics sent over ({3932671187005565248, 3307748995851422918, 3457685651319753958, 5644029351665142443, 8520057775120803724, 6085224541387294674, 7397400968018593300, 8126928638149987893} didn't).


# ONT_small

In [29]:
ont_small_WIDs = list()
with open('slim_metadata/ONT_small.workflow-ids.all.txt') as f:
    ont_small_WIDs= [l.strip() for l in f.readlines()]
print(ont_small_WIDs)

['0413c5c9-e608-425e-af6b-412edec5d17f', 'df6cdae5-0acd-4909-af29-2558a6acd39b', 'eea12b44-506b-49a1-b17d-8b3e1f938e33']


In [32]:
query_for_a_run(ont_small_WIDs, 'ONT_small')

['0413c5c9-e608-425e-af6b-412edec5d17f', 'df6cdae5-0acd-4909-af29-2558a6acd39b', 'eea12b44-506b-49a1-b17d-8b3e1f938e33']


Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows ar




Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Finished on 21:13:51.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Not all VMs provisioned have their metrics sent over ({5286474844691586146, 3758594032194882051, 7605131390616668516, 4919410464080857804, 8666979075784534543, 7738783716236922833, 1117134543235625654, 423902065469775355} didn't).
Not all VMs provisioned have their metrics sent over ({5286474844691586146, 3758594032194882051, 7605131390616668516, 4919410464080857804, 8666979075784534543, 7738783716236922833, 1117134543235625654, 423902065469775355} didn't).
Not all VMs provisioned have their metrics sent over ({5286474844691586146, 

# ONT_large

In [20]:
ont_large_WIDs = list()
with open('slim_metadata/ONT_large.workflow-ids.all.txt') as f:
    ont_large_WIDs= [l.strip() for l in f.readlines()]
print(ont_large_WIDs)

['20383457-6d38-43e5-adf3-33fabe800ae4', '841b09cc-250d-40de-b5ed-440585dbec0e', 'e1b4c89c-d8dc-4f87-a3e0-8539a39fe99e']


In [21]:
query_for_a_run(ont_large_WIDs, 'ONT_large')

['20383457-6d38-43e5-adf3-33fabe800ae4', '841b09cc-250d-40de-b5ed-440585dbec0e', 'e1b4c89c-d8dc-4f87-a3e0-8539a39fe99e']


Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Nrows of runtime: 250, Ncols of runtime: 15, 
Nrows of meta: 596, Ncols of meta: 20
Nrows of runtime: 250, Ncols of runtime: 15, 
Nrows of meta: 596, Ncols of meta: 20
Nrows of runtime: 250, Ncols of runtime: 15, 
Nrows of meta: 596, Ncols of meta: 20
Nrows of runtime




Finished on 21:05:28.
Finished on 21:05:28.
Finished on 21:05:28.
Finished on 21:05:28.
Finished on 21:05:28.
Finished on 21:05:28.
Totalling 00:01:08.
Totalling 00:01:08.
Totalling 00:01:08.
Totalling 00:01:08.
Totalling 00:01:08.
Totalling 00:01:08.
Not all VMs provisioned have their metrics sent over ({6204335748420771294, 816336637509299590, 5183444931517151784, 3220567095387396395, 1401359785074399419, 5878927564124234380, 5193676067602401398, 722571883620377403} didn't).
Not all VMs provisioned have their metrics sent over ({6204335748420771294, 816336637509299590, 5183444931517151784, 3220567095387396395, 1401359785074399419, 5878927564124234380, 5193676067602401398, 722571883620377403} didn't).
Not all VMs provisioned have their metrics sent over ({6204335748420771294, 816336637509299590, 5183444931517151784, 3220567095387396395, 1401359785074399419, 5878927564124234380, 5193676067602401398, 722571883620377403} didn't).
Not all VMs provisioned have their metrics sent over ({620

# CCS_large_96G_16cores

In [18]:
ccs_large_WIDs = list()
with open('slim_metadata/CCS_large_96G_16cores.workflow-ids.all.txt') as f:
    ccs_large_WIDs= [l.strip() for l in f.readlines()]
print(ccs_large_WIDs)

['18378026-20e0-40a2-a824-341e7db9017c', '6eaf152e-d48a-4f89-9b56-de0604654fbe', 'cf701424-8d5c-4acc-8e2a-69d89aba0597']


In [19]:
query_for_a_run(ccs_large_WIDs, 'CCS_large')

['18378026-20e0-40a2-a824-341e7db9017c', '6eaf152e-d48a-4f89-9b56-de0604654fbe', 'cf701424-8d5c-4acc-8e2a-69d89aba0597']


Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Nrows of runtime: 210, Ncols of runtime: 15, 
Nrows of meta: 507, Ncols of meta: 20
Nrows of runtime: 210, Ncols of runtime: 15, 
Nrows of meta: 507, Ncols of meta: 20
Nrows of runtime: 210, Ncols of runtime: 15, 
Nrows of meta: 507, Ncols of meta: 20
Nrows of runtime: 210, Ncols of runtime: 15, 
Nrows of meta: 507, Ncols of meta: 20
Nrows of runtime: 210, Ncols of runtime: 15, 
Nrows of




Finished on 21:02:01.
Finished on 21:02:01.
Finished on 21:02:01.
Finished on 21:02:01.
Finished on 21:02:01.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Totalling 00:00:32.
Not all VMs provisioned have their metrics sent over ({5808648056482051204, 6631395865764301030, 3605648494437608553, 4258296434654197581, 344380703034587316, 1110783325986895414, 3745631825190463990, 7662976353756526134} didn't).
Not all VMs provisioned have their metrics sent over ({5808648056482051204, 6631395865764301030, 3605648494437608553, 4258296434654197581, 344380703034587316, 1110783325986895414, 3745631825190463990, 7662976353756526134} didn't).
Not all VMs provisioned have their metrics sent over ({5808648056482051204, 6631395865764301030, 3605648494437608553, 4258296434654197581, 344380703034587316, 1110783325986895414, 3745631825190463990, 7662976353756526134} didn't).
Not all VMs provisioned have their metrics sent over ({5808648056482051204, 6631395865764301030, 

# ASM_small_final

In [22]:
asm_small_WIDs = list()
with open('slim_metadata/ASM_small_final.workflow-ids.all.txt') as f:
    asm_small_WIDs= [l.strip() for l in f.readlines()]
print(asm_small_WIDs)

['358ae0a1-b76b-44ca-9714-64806724e868', 'c1cc1bde-4e34-49be-ad7c-ad2806887cc8', 'df3636ef-5815-45fa-aac6-4ba79c2a1c03']


In [24]:
query_for_a_run(asm_small_WIDs, 'ASM_small')

['358ae0a1-b76b-44ca-9714-64806724e868', 'c1cc1bde-4e34-49be-ad7c-ad2806887cc8', 'df3636ef-5815-45fa-aac6-4ba79c2a1c03']


Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Nrows of runtime: 18, Ncols of runtime: 15, 
Nrows of meta: 22, Ncols of meta: 20
Nrows of runtime: 18, Ncols of runtime: 15, 
Nrows of meta: 22, 




Finished on 21:07:19.
Finished on 21:07:19.
Finished on 21:07:19.
Finished on 21:07:19.
Finished on 21:07:19.
Finished on 21:07:19.
Finished on 21:07:19.
Totalling 00:00:04.
Totalling 00:00:04.
Totalling 00:00:04.
Totalling 00:00:04.
Totalling 00:00:04.
Totalling 00:00:04.
Totalling 00:00:04.
Not all VMs provisioned have their metrics sent over ({1649443834895057536, 5717816116716257097, 967303496718141914, 6806849287010401975, 751105932643060186, 2900420353402653146, 7141424774057609690, 1654467413328059449} didn't).
Not all VMs provisioned have their metrics sent over ({1649443834895057536, 5717816116716257097, 967303496718141914, 6806849287010401975, 751105932643060186, 2900420353402653146, 7141424774057609690, 1654467413328059449} didn't).
Not all VMs provisioned have their metrics sent over ({1649443834895057536, 5717816116716257097, 967303496718141914, 6806849287010401975, 751105932643060186, 2900420353402653146, 7141424774057609690, 1654467413328059449} didn't).
Not all VMs prov

# ASM_medium_final

In [25]:
asm_medium_WIDs = list()
with open('slim_metadata/ASM_medium_final.workflow-ids.all.txt') as f:
    asm_medium_WIDs= [l.strip() for l in f.readlines()]
print(asm_medium_WIDs)

['01ed50b8-5d9e-4829-889d-07355190de1e', '6c407136-1e62-428f-97eb-d9d21d739625', 'fc49a989-24d2-46bc-a0e3-1fafcde14ef4']


In [26]:
query_for_a_run(asm_medium_WIDs, 'ASM_medium')

['01ed50b8-5d9e-4829-889d-07355190de1e', '6c407136-1e62-428f-97eb-d9d21d739625', 'fc49a989-24d2-46bc-a0e3-1fafcde14ef4']


Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Nrows of runtime: 19, Nc




Finished on 21:08:15.
Finished on 21:08:15.
Finished on 21:08:15.
Finished on 21:08:15.
Finished on 21:08:15.
Finished on 21:08:15.
Finished on 21:08:15.
Finished on 21:08:15.
Totalling 00:00:18.
Totalling 00:00:18.
Totalling 00:00:18.
Totalling 00:00:18.
Totalling 00:00:18.
Totalling 00:00:18.
Totalling 00:00:18.
Totalling 00:00:18.
Not all VMs provisioned have their metrics sent over ({8152887381832262429, 4534217327161403680, 5197538883878807840, 7866855011028855072, 8665367578285205792, 8255305537044900128, 5029900164217946089, 5309323327676929903} didn't).
Not all VMs provisioned have their metrics sent over ({8152887381832262429, 4534217327161403680, 5197538883878807840, 7866855011028855072, 8665367578285205792, 8255305537044900128, 5029900164217946089, 5309323327676929903} didn't).
Not all VMs provisioned have their metrics sent over ({8152887381832262429, 4534217327161403680, 5197538883878807840, 7866855011028855072, 8665367578285205792, 8255305537044900128, 5029900164217946089

# ASM_large_final

In [27]:
asm_large_WIDs = list()
with open('slim_metadata/ASM_large_final.workflow-ids.all.txt') as f:
    asm_large_WIDs= [l.strip() for l in f.readlines()]
print(asm_large_WIDs)

['1850eaea-4669-4453-9f83-9d6587975898', '22137af3-5b44-4743-b014-71ba101abe6b', 'c144b3c4-334c-48b3-9531-20feacb4eaa9']


In [28]:
query_for_a_run(asm_large_WIDs, 'ASM_large')

['1850eaea-4669-4453-9f83-9d6587975898', '22137af3-5b44-4743-b014-71ba101abe6b', 'c144b3c4-334c-48b3-9531-20feacb4eaa9']


Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched runtime table.
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Fetched metadata table
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You might want to check.
Metadata and runtime number of rows are different. You




Finished on 21:08:53.
Finished on 21:08:53.
Finished on 21:08:53.
Finished on 21:08:53.
Finished on 21:08:53.
Finished on 21:08:53.
Finished on 21:08:53.
Finished on 21:08:53.
Finished on 21:08:53.
Totalling 00:00:19.
Totalling 00:00:19.
Totalling 00:00:19.
Totalling 00:00:19.
Totalling 00:00:19.
Totalling 00:00:19.
Totalling 00:00:19.
Totalling 00:00:19.
Totalling 00:00:19.
Not all VMs provisioned have their metrics sent over ({970310227228415178, 94250719772915564, 132530955828089686, 1421099313032876887, 1576052047921302936, 6931321790050358102, 7498956238531612056, 5464028212887795354} didn't).
Not all VMs provisioned have their metrics sent over ({970310227228415178, 94250719772915564, 132530955828089686, 1421099313032876887, 1576052047921302936, 6931321790050358102, 7498956238531612056, 5464028212887795354} didn't).
Not all VMs provisioned have their metrics sent over ({970310227228415178, 94250719772915564, 132530955828089686, 1421099313032876887, 1576052047921302936, 6931321790