# Imports

In [1]:
from google.oauth2 import service_account
from google.cloud import monitoring_v3
from google.cloud import bigquery
from google.cloud import storage
from google.oauth2 import service_account
from google.cloud.bigquery.job import ExtractJobConfig
import time
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import itertools
import datetime
import sharedVariables
from sharedVariables import *

In [4]:
todaystring = datetime.datetime.today().strftime('%Y%m%d')
outdir = f"data/{todaystring}"
if not os.path.exists(outdir):
    os.mkdir(outdir)

# Network Logs

In [5]:

credentials = service_account.Credentials.from_service_account_file(
    '../infrastructure/credentials.json')

client = bigquery.Client(credentials=credentials)
today = datetime.datetime.today()
table_id = f"compute_googleapis_com_vpc_flows_{today.strftime('%Y%m%d')}"
# table_id = 'compute_googleapis_com_vpc_flows_20210720'
blob_name = "export.log"



extract_conf = ExtractJobConfig()
extract_conf.compression = 'NONE'
extract_conf.destination_format = 'NEWLINE_DELIMITED_JSON'

def getDataset(experiment): 
    bucket_name = experiment + "-log-bucket"

    destination_uri = "gs://{}/{}".format(bucket_name, blob_name)
    dataset_ref = bigquery.DatasetReference(project, experiment.replace("-", "_"))
    table_ref = dataset_ref.table(table_id)

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        # Location must match that of the source table.
        location="US",
        job_config=extract_conf
    )  # API request
    extract_job.result()  # Waits for job to complete.

    print(
        "Exported {}:{}.{} to {}".format(project, experiment, table_id, destination_uri)
    )


    storage_client = storage.Client(credentials=credentials)

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.download_to_filename(f"{outdir}/{experiment}.log")

    print(
        "Blob downloaded successfully."
    )

for experiment in sharedVariables.experiments: 
    try: 
        getDataset(experiment)
    except Exception as e: 
        print(e)

Exported dspj-315716:experiment-baseline-with-latency.compute_googleapis_com_vpc_flows_20210720 to gs://experiment-baseline-with-latency-log-bucket/export.log
Blob downloaded successfully.
Exported dspj-315716:experiment-syncmesh-with-latency.compute_googleapis_com_vpc_flows_20210720 to gs://experiment-syncmesh-with-latency-log-bucket/export.log
Blob downloaded successfully.
Exported dspj-315716:experiment-syncmesh-with-latency-6.compute_googleapis_com_vpc_flows_20210720 to gs://experiment-syncmesh-with-latency-6-log-bucket/export.log
Blob downloaded successfully.


## Get TimeStamps

In [6]:
df_sync = loadData(f'{outdir}/experiment-syncmesh-with-latency.log')
df_base = loadData(f'{outdir}/experiment-baseline-with-latency.log')

def filterDataForSeperator(df): 
    df = df[df["jsonPayload.connection.src_ip"].isin([ip_seperator, ip_orchestrator])]
    df = df[df["jsonPayload.connection.dest_ip"].isin([ip_seperator, ip_orchestrator])]
    # df = df[df["jsonPayload.connection.dest_port"] == 443]
    return df

seperator_base = filterDataForSeperator(df_base)
seperator_sync = filterDataForSeperator(df_sync)

# df = df[df["jsonPayload.connection.dest_ip"]]
print(seperator_sync.index)
print(seperator_base.index)

DatetimeIndex(['2021-07-20 23:38:00.580331+00:00', '2021-07-20 23:38:00.580331+00:00'], dtype='datetime64[ns, UTC]', name='timestamp', freq=None)
DatetimeIndex(['2021-07-20 23:27:26.878197+00:00',
               '2021-07-20 23:27:26.878197+00:00',
               '2021-07-20 23:31:27.925317+00:00',
               '2021-07-20 23:31:27.925317+00:00'],
              dtype='datetime64[ns, UTC]', name='timestamp', freq=None)


# Monitoring

In [10]:
credentials = service_account.Credentials.from_service_account_file(
    '../infrastructure/credentials.json')

# Our project ID

client = monitoring_v3.MetricServiceClient(credentials=credentials)
project_name = f"projects/{project_id}"
now = time.time()
seconds = int(now)
nanos = int((now - seconds) * 10 ** 9)
interval = monitoring_v3.TimeInterval(
    {
        "end_time": {"seconds": seconds, "nanos": nanos},
        # 3600 = Get the last hour of metrics
        "start_time": {"seconds": (seconds - (3600 * 1)), "nanos": nanos},
    }
)

# Add Filter? metric.label.instance_name = "your-instance-id"
results_cpu = client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type =  "compute.googleapis.com/instance/cpu/utilization"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_io_read = client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/read_bytes_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_io_write = client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/write_bytes_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_iops_read = client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/read_ops_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_iops_write = client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/write_ops_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)

def loadMonitoringData(experiment): 
    df = pd.DataFrame()
    set_timestamp_column = True
    first_len = 0
    for ts_cpu, ts_io_read, ts_io_write, ts_iops_read, ts_iops_write in zip(results_cpu, results_io_read, results_io_write, results_iops_read, results_iops_write):
        # print(ts)
        label = ts_cpu.metric.labels["instance_name"]
        # print(ts_cpu)
        if "orchestrator" in label:
            # Skip Orchesrtator
            continue
        if "experiment-syncmesh-with-latency-6" in label:
            # Skip Orchesrtator
            continue
        if label.startswith(experiment):
            print(label + ": " + str(len(ts_cpu.points)))
            if set_timestamp_column:
                first_len=len(ts_cpu.points)-1
                print(first_len)
                df['timestamp'] = pd.to_datetime([p.interval.start_time.ToDatetime() for p in ts_cpu.points[:first_len]])
                set_timestamp_column = False
            # print(ts.points[0])
            # When deploying the vm they might take different amount of time leading to some values beeing available a minute early this leading to different length
            # We can trim off the last values as they are orderer from most recent to last
            df['cpu_util_' + label] = [p.value.double_value for p in ts_cpu.points[:first_len]]
            df['io_read_' + label] = [p.value.int64_value for p in ts_io_read.points[:first_len]]
            df['io_write_' + label] = [p.value.int64_value for p in ts_io_write.points[:first_len]]
            df['iops_read_' + label] = [p.value.int64_value for p in ts_iops_read.points[:first_len]]
            df['iops_write_' + label] = [p.value.int64_value for p in ts_iops_write.points[:first_len]]


    df.set_index('timestamp', inplace=True)
    df.index = pd.to_datetime(df.index)
    columns = df.columns
    df.to_csv(f"{outdir}/monitoring_{experiment}.csv")
    print(f"Gathered all Monitoring data for {experiment}")
    return df

for experiment in sharedVariables.experiments: 
    try: 
        loadMonitoringData(experiment)
    except Exception as e: 
        print(e)



experiment-baseline-with-latency-node-instance-2: 57
56
experiment-baseline-with-latency-node-instance-3: 57
experiment-baseline-with-latency-node-instance-1: 57
experiment-baseline-with-latency-central-server-instance: 57
experiment-baseline-with-latency-client-instance: 57
Gathered all Monitoring data for experiment-baseline-with-latency
experiment-syncmesh-with-latency-node-instance-2: 49
48
experiment-syncmesh-with-latency-node-instance-3: 48
experiment-syncmesh-with-latency-node-instance-1: 48
experiment-syncmesh-with-latency-client-instance: 48
Gathered all Monitoring data for experiment-syncmesh-with-latency
"None of ['timestamp'] are in the columns"
