In [1]:
# Following variables can be set here or via papermill
if ci != "True":
    print("Running in non-CI mode")
    # experiment = "experiment-distributed-gundb-with-latency-3"

print(ci)
print(experiment)

# Imports

In [24]:
from google.oauth2 import service_account
from google.cloud import monitoring_v3
from google.cloud import bigquery
from google.cloud import storage
from google.oauth2 import service_account
from google.cloud.bigquery.job import ExtractJobConfig
import time
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import itertools
import os
from pathlib import Path
import datetime
import shutil
import subprocess
from subprocess import PIPE
import sharedVariables
from sharedVariables import *

# Network Logs

In [25]:

credentials = service_account.Credentials.from_service_account_file(
    '../infrastructure/credentials.json')

client = bigquery.Client(credentials=credentials)
today = datetime.datetime.today()
table_id = f"compute_googleapis_com_vpc_flows_{today.strftime('%Y%m%d')}"
# table_id = 'compute_googleapis_com_vpc_flows_20210720'
blob_name = "export.log"



extract_conf = ExtractJobConfig()
extract_conf.compression = 'NONE'
extract_conf.destination_format = 'NEWLINE_DELIMITED_JSON'

def getDataset(experiment, outDir = datetime.datetime.today().strftime('%Y%m%d-%H')): 
    ensureDirectory(outDir)
    bucket_name = experiment + "-log-bucket"

    destination_uri = "gs://{}/{}".format(bucket_name, blob_name)
    dataset_ref = bigquery.DatasetReference(project, experiment.replace("-", "_"))
    table_ref = dataset_ref.table(table_id)

    extract_job = client.extract_table(
        table_ref,
        destination_uri,
        # Location must match that of the source table.
        location="US",
        job_config=extract_conf
    )  # API request
    extract_job.result()  # Waits for job to complete.

    print(
        "Exported {}:{}.{} to {}".format(project, experiment, table_id, destination_uri)
    )


    storage_client = storage.Client(credentials=credentials)

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.download_to_filename(f"{outDir}/gcp-flow-network.log")

    print(
        "Blob downloaded successfully."
    )

if 'experiment' not in locals():
    for experiment in sharedVariables.experiments: 
        try: 
            getDataset(experiment)
        except Exception as e: 
            print(e)

# PCAPs

In [26]:
def download_pcap(ip, outDir = datetime.datetime.today().strftime('%Y%m%d-%H')):
    ensureDirectory(outDir)
    print("Dowloading File")
    print(os.path.join(Path.cwd(), "..", "cert"))
    result = subprocess.run(f"scp -i ../infrastructure/orchestrator.pem -o StrictHostKeyChecking=no orchestrator@{ip}:/captures.zip ./captures.zip", shell=True, stdout=PIPE, stderr=PIPE, universal_newlines=True)
    if(result.returncode == 0):
        print(result.stdout)
    else:
        raise Exception(result.stderr)
    shutil.unpack_archive("./captures.zip", outDir)

if 'experiment' not in locals():
    download_pcap("35.224.133.98")


# Monitoring

In [27]:
credentials = service_account.Credentials.from_service_account_file(
    '../infrastructure/credentials.json')

# Our project ID

monitoring_client = monitoring_v3.MetricServiceClient(credentials=credentials)
project_name = f"projects/{project_id}"
now = time.time()
seconds = int(now)
nanos = int((now - seconds) * 10 ** 9)

# 3600 = Get the last hour of metrics
timeframe = 3600 * 3
if 'experiment' not in locals():
    print("set timeframe to one hour")
    timeframe = 3600 * 1

print("Timeframe:", timeframe)
interval = monitoring_v3.TimeInterval(
    {
        "end_time": {"seconds": seconds, "nanos": nanos},
        "start_time": {"seconds": (seconds - timeframe), "nanos": nanos},
    }
)


# Add Filter? metric.label.instance_name = "your-instance-id"
results_cpu = monitoring_client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type =  "compute.googleapis.com/instance/cpu/utilization"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_io_read = monitoring_client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/read_bytes_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_io_write = monitoring_client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/write_bytes_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_iops_read = monitoring_client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/read_ops_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)
results_iops_write = monitoring_client.list_time_series(
    request={
        "name": project_name,
        "filter": 'metric.type = "compute.googleapis.com/instance/disk/write_ops_count"',
        "interval": interval,
        "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
    }
)

mutli_results_memory = {}
mem_states = ["buffered", "cached", "free", "slab", "used"]
for state in mem_states:
    mutli_results_memory[state] = monitoring_client.list_time_series(
        request={
            "name": project_name,
            "filter": f'metric.type = "agent.googleapis.com/memory/bytes_used" AND metric.labels.state = "{state}"',
            "interval": interval,
            "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
        }
    )

mutli_results_memory_percentage = {}
for state in mem_states:
    mutli_results_memory_percentage[state] = monitoring_client.list_time_series(
        request={
            "name": project_name,
            "filter": f'metric.type = "agent.googleapis.com/memory/percent_used" AND metric.labels.state = "{state}"',
            "interval": interval,
            "view": monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL,
        }
    )



# Prepare Data so that as the monitoring time_series_list is not aligned
time_series_list_list_labels = ["cpu_util_", "io_read_", "io_write_", "iops_read_", "iops_write_"]
time_series_list_list = [results_cpu, results_io_read, results_io_write, results_iops_read, results_iops_write]
# Add Memory
for state in mem_states:
    time_series_list_list.append(mutli_results_memory[state])
    time_series_list_list_labels.append(f"mem_{state}_")

for state in mem_states:
    time_series_list_list.append(mutli_results_memory_percentage[state])
    time_series_list_list_labels.append(f"mem_perc_{state}_")

print(time_series_list_list_labels)

instance_logs = {}

for index, time_series_list in enumerate(time_series_list_list):
    for time_series in time_series_list:
        if time_series.metric.labels["instance_name"]:
            label = time_series.metric.labels["instance_name"]
        else:
            # Find out label by comapring the instance ID
            for compare_item in time_series_list_list[0]:
                if compare_item.resource.labels["instance_id"] == time_series.resource.labels["instance_id"]:
                    label = compare_item.metric.labels["instance_name"]
        if label not in instance_logs:
            instance_logs[label] = np.empty(len(time_series_list_list), dtype=object)
        instance_logs[label][index] = time_series

def loadMonitoringData(experiment, outDir = datetime.datetime.today().strftime('%Y%m%d-%H')): 
    ensureDirectory(outDir)
    df = pd.DataFrame()
    set_timestamp_column = True
    first_len = 0
    # Filter instances for our experiment
    filteredInstances = {}
    for key, instance in instance_logs.items():
        if key.startswith(experiment):
            filteredInstances[key] = instance

    # Find out which point array is smallest
    first_len = min([len(item.points) for instances in filteredInstances.values() for item in instances])
    for instances in filteredInstances.values():
        print(instances[0].metric.labels["instance_name"], len(instances[0].points))



    print("Overall min len:", first_len)

    for key, instances in filteredInstances.items():
        for index in range(len(instances)):
            label = key
            # if "orchestrator" in label:
            #     # Skip Orchesrtator
            #     continue
            if set_timestamp_column:
                # first_len = min([len(item.points) - 1 for item in instances])
                # print(first_len)
                print("SET min len:", first_len)

                df['timestamp'] = pd.to_datetime([p.interval.start_time.ToDatetime() for p in instances[0].points[:first_len]])
                set_timestamp_column = False
            # print(ts.points[0])
            # When deploying the vm they might take different amount of time leading to some values beeing available a minute early this leading to different length
            # We can trim off the last values as they are orderer from most recent to last
            df[time_series_list_list_labels[index] + label] = [p.value.double_value for p in instances[index].points[:first_len]]

            # df['cpu_util_' + label] = [p.value.double_value for p in ts_cpu.points[:first_len]]
            # df['io_read_' + label] = [p.value.int64_value for p in ts_io_read.points[:first_len]]
            # df['io_write_' + label] = [p.value.int64_value for p in ts_io_write.points[:first_len]]
            # df['iops_read_' + label] = [p.value.int64_value for p in ts_iops_read.points[:first_len]]
            # df['iops_write_' + label] = [p.value.int64_value for p in ts_iops_write.points[:first_len]]

    df.set_index('timestamp', inplace=True)
    df.index = pd.to_datetime(df.index)
    columns = df.columns
    df.to_csv(f"{outDir}/monitoring.csv")
    print(f"Gathered all Monitoring data for {experiment}")
    return df


# Make sure your experiment name is included in the experiments list, otherwise only errors will occur.
if 'experiment' not in locals():
    for experiment in sharedVariables.experiments: 
        try: 
            loadMonitoringData(experiment)
        except Exception as e: 
            print(e)
# loadMonitoringData("experiment-syncmesh-with-latency-9")


Timeframe: 21600
['cpu_util_', 'io_read_', 'io_write_', 'iops_read_', 'iops_write_', 'mem_buffered_', 'mem_cached_', 'mem_free_', 'mem_slab_', 'mem_used_', 'mem_perc_buffered_', 'mem_perc_cached_', 'mem_perc_free_', 'mem_perc_slab_', 'mem_perc_used_']


# Get TimeStamps

In [28]:
# # Workaround for now
# # df_sync3 = pd.read_csv(f'{outdir}/experiment-syncmesh-with-latency-3.csv')
# # df_base3 = pd.read_csv(f'{outdir}/experiment-baseline-with-latency-3.csv')
# df_sync3 = loadData(f'{outdir}/experiment-syncmesh-with-latency-3.log')
# df_base3 = loadData(f'{outdir}/experiment-baseline-with-latency-3.log')
# # df_base3.set_index('timestamp', inplace=True)
# # df_sync3.set_index('timestamp', inplace=True)

# # df_sync6 = loadData(f'{outdir}/experiment-syncmesh-with-latency-6.log')
# # df_base6 = loadData(f'{outdir}/experiment-baseline-with-latency-6.log')

# def filterDataForSeperator(df): 
#     df = df[df["jsonPayload.connection.src_ip"].isin([ip_seperator, ip_orchestrator])]
#     df = df[df["jsonPayload.connection.dest_ip"].isin([ip_seperator, ip_orchestrator])]
#     df = df[df["jsonPayload.connection.dest_port"] == 443]
#     return df

# seperator_base3 = filterDataForSeperator(df_base3)
# seperator_sync3 = filterDataForSeperator(df_sync3)

# # df = df[df["jsonPayload.connection.dest_ip"]]
# print(seperator_sync3.index)
# print(seperator_base3.index)
# # df_base3.info()

In [29]:
# Script for automatic data retrieval via papermill
if 'experiment' in locals():
    todaystring = datetime.datetime.today().strftime('%Y%m%d-%H')
    outdir = f"data/{todaystring}-{experiment}"

    f = open(os.path.join(Path.cwd(), "..", "infrastructure", "orchestrator.txt"), "r")
    ip = f.read()
    download_pcap(ip, outDir=outdir)
    loadMonitoringData(experiment, outDir=outdir)
    getDataset(experiment, outDir=outdir)

    

experiment-distributed-gundb-with-latency-3-node-instance-2 188
experiment-distributed-gundb-with-latency-3-node-instance-1 188
experiment-distributed-gundb-with-latency-3-node-instance-3 187
experiment-distributed-gundb-with-latency-3-client-instance 188
experiment-distributed-gundb-with-latency-3-test-orchestrator 185
Overall min len: 185
SET min len: 185
Gathered all Monitoring data for experiment-distributed-gundb-with-latency-3


NotFound: 404 POST https://bigquery.googleapis.com/bigquery/v2/projects/dspj-315716/jobs?prettyPrint=false: Not found: Dataset dspj-315716:experiment_distributed_gundb_with_latency_3