In [1]:
from google.cloud import bigquery
import os
import json
import pandas as pd
import numpy as np
from pathlib import Path
import datetime

from importlib import reload

import src.table_stats
reload(src.table_stats)

from src.table_stats import print_stats

# Initialize BigQuery client
# client = bigquery.Client()

# Set maximum width for table view
pd.set_option('max_colwidth', 60)
# Set maximum rows for table view
pd.set_option('display.max_rows',200)
pd.options.plotting.backend = "plotly"

DATA_FOLDER = Path(os.getenv("WORKDIR")).joinpath("data")

In [2]:
# Configure query by run id

PROJECT_ID = "symphony-dev-2"
DATASET_ID = "log_dataset_default"
TABLE_ID = "logs-2"
RUN_ID = "test-bigscale-14"


In [None]:
QUERY = """
SELECT DISTINCT run from `{project}.{dataset}.{table}`
""".format(
    project = PROJECT_ID,
    dataset = DATASET_ID,
    table = TABLE_ID,
    run_id = RUN_ID
)

query_job = client.query(QUERY)
rows = query_job.result()
df = rows.to_dataframe()
runs = df["run"].to_list()

RUN_ID in runs
# runs

In [None]:
QUERY = """
SELECT * from `{project}.{dataset}.{table}`
WHERE run = "{run_id}"
WHERE time > "2025-10-01T10:08:58+00:00"
ORDER BY time DESC
-- LIMIT 1000 
-- Optionally limit the query when dealing with too big datasets...
""".format(
    project = PROJECT_ID,
    dataset = DATASET_ID,
    table = TABLE_ID,
    run_id = RUN_ID
)

query_job = client.query(QUERY)
rows = query_job.result()
df = rows.to_dataframe()

# Parse detail json string
df.detail = df.detail.transform(lambda x: json.loads(x) if x is not None else None)

# Sort by time
df = df.set_index("time").sort_index().reset_index()

# Optionally identify when grr changed
df["grr_shift_out"] = df[df.event == "cli:grr_out"].detail != df[df.event == "cli:grr_out"].shift().detail
df["grr_shift_in"] = df[df.event == "cli:grr_in"].detail != df[df.event == "cli:grr_in"].shift().detail

print_stats(df)


In [None]:
QUERY = """
SELECT COUNT(event) from `{project}.{dataset}.{table}`
WHERE (
    run = "{run_id}" AND
    event = "node:create"
)
-- LIMIT 1000 
-- Optionally limit the query when dealing with too big datasets...
""".format(
    project = PROJECT_ID,
    dataset = DATASET_ID,
    table = TABLE_ID,
    run_id = RUN_ID
)

partial_query_job = client.query(QUERY)
partial_rows = partial_query_job.result()
partial_df = partial_rows.to_dataframe()

partial_df.head()

In [4]:
REQUEST_TIMESTAMP = None; RETURN_TIMESTAMP = None

REQUEST_TIMESTAMP = "2025-10-01T09:08:58+00:00"
RETURN_TIMESTAMP = "2025-09-16T13:59:08+00:00"
REQUEST_TIMESTAMP = datetime.datetime.fromisoformat(REQUEST_TIMESTAMP).astimezone(datetime.UTC)
RETURN_TIMESTAMP = datetime.datetime.fromisoformat(RETURN_TIMESTAMP).astimezone(datetime.UTC)

test_folder = DATA_FOLDER.joinpath(RUN_ID)

cloud_hosts_path = test_folder.joinpath("cloud_hosts.csv")

cloud_hosts = None
if os.path.isfile(cloud_hosts_path):
    cloud_hosts_df = pd.read_csv(
        cloud_hosts_path,
        index_col=0,
        converters={
            "releaseTime": datetime.datetime.fromisoformat,
            "launchTime": datetime.datetime.fromisoformat
        },
    )

# Parse data from systemd resource watcher 

In [None]:
resources_path = test_folder.joinpath("resources")

resources_dfs = None
if os.path.isdir(resources_path):
    resources_dfs = {
        timestamp: pd.read_csv(
            resources_path.joinpath(timestamp),
        ) for timestamp in os.listdir(resources_path)
    }

def transform_maxmem(row):
    try:
        row.maxmem = int(row.maxmem[:-1])
    except Exception as e:
        print(row.status, row.maxmem)
        raise e
    return row

def transform_status(row):
    if '-' in row[[
        "maxmem",
        "nprocs",
        "ncores",
        "nthreads"
    ]].to_list():
        row.status = "bootstraping"
    return row


if resources_dfs is not None: 
    resources = []

    for timestamp, resources_df in resources_dfs.items():
        # Cathegorize items with "-" as bootstraping
        resources_df = resources_df.transform(transform_status, axis=1)

        # Transform maxmem to integer
        resources_df = pd.concat([
            resources_df.loc[~(resources_df.status == "ok")],
            resources_df.loc[resources_df.status == "ok"].transform(transform_maxmem, axis=1)
        ])

        # Magick for extracting sum
        resources.append({
            "timestamp": datetime.datetime.fromtimestamp(int(timestamp)).astimezone(tz=datetime.UTC),
            ** resources_df[resources_df.status == "ok"][[
                "maxmem",
                "nprocs",
                "ncores",
                "nthreads"
            ]].astype(int).sum().to_dict(),
            ** resources_df.value_counts("status").to_dict()
        })

    resources = pd.DataFrame.from_records(resources)

    if REQUEST_TIMESTAMP is not None:
        resources["request_delta"] = resources.timestamp.transform(lambda x: (x - REQUEST_TIMESTAMP).total_seconds())
        resources = resources.set_index("request_delta").sort_index()
    else:
        resources = resources.set_index("timestamp").sort_index()
    




In [None]:
resources.to_csv(
    test_folder
        .joinpath("resources.csv")
)

# Pod Parsing and back propagation

In [None]:
# TODO: Add preemption analysis with `dfc_pivot_preempted` and `cloud_hosts`

# Copy dataframe
dfc = df.copy(deep=True)

# Filter pertinent events
dfc = dfc[
    dfc.event.isin([
            "cli:rm_out",
            "pod:create",
            "pod:scheduled",
            "container:started",
            "node:preempted",
            "cli:grs_out",
            "cli:rrm_in",
            "pod:delete",
            "scale_up"
    ])
]

# ========= Parse Node Preemption =========

# This transforms the `node:preempted` into multiple `pod:node_preempted` for
# all pods that have been scheduled to the node before its preemption.
# This is latter used to remove lines from the pivoted table

def parse_preemption(row):
    row.pod = df[
        np.logical_and.reduce([
            df.event == "pod:scheduled",
            df.node == row.node,
            df.time <= row.time
        ])
    ].pod.to_list()
    row.event = "pod:node_preempted"
    return row


dfc = pd.concat([
    dfc.loc[~(dfc.event == "node:preempted")],
    dfc[dfc.event == "node:preempted"].transform(
        parse_preemption, axis=1
    ).explode("pod")
]).reset_index(drop=True)

# ========= Parse Pod Preemption =========

# Checks if the pod:delete event contains data
# informing that the pod was deleted by the scheduler
# due to preemption

def parse_pod_preemption(row):
    conditions = row.detail.get("conditions")
    if conditions is None:
        return None
    last_reason = conditions[0].get("reason")
    if last_reason is None:
        return None
    if last_reason == "PreemptionByScheduler":
        row.event = "pod:preempted"
        return row
    return None

dfc = pd.concat([   
    dfc,
    dfc[dfc.event == "pod:delete"].apply(
        parse_pod_preemption,
        axis="columns"
    ).dropna(how="all")
],
    ignore_index=True
)


# ========= Parse RM =========

# This transforms a single cli:rm_out into multiple pod:rm
# extracting the pod name

def parse_rm_out(row):
    # Get associated pods
    row.pod = dfc[dfc.sym_request == row.detail["payload"]["requestId"]].pod.to_list()
    # Change event name
    row.event = "cli:rm"
    return row

dfc = pd.concat([
    dfc.loc[~(dfc.event == "cli:rm_out")],
    dfc[dfc.event == "cli:rm_out"].transform(
        parse_rm_out, axis=1
    ).explode("pod")
]).reset_index(drop=True)

# ========= Parse RRM =========

# This transforms a single cli:rrm_in into multiple pod:rrm
# extracting the pod name

def parse_rrm_in(row):
    # Get pod names
    row.pod = [machine["name"] for machine in row.detail["payload"]["machines"]]
    # Change event name
    row.event = "cli:rrm"
    return row

dfc = pd.concat([
    dfc.loc[~(dfc.event == "cli:rrm_in")],
    dfc[dfc.event == "cli:rrm_in"].transform(
        parse_rrm_in, axis=1
    ).explode("pod")
]).reset_index(drop=True)

# ========= Parse GRS Output =========

# This extracts from the `cli:grs_out` when a machine 
# was first recognized as running by Symphony.

def parse_grs_out(row):
    # Extract pods which are running

    machines = [
        machine 
        for request in row.detail["payload"]["requests"]
        for machine in request["machines"]
    ]

    # if len(machines) == 0:
    #     return None

    row.pod = [
        machine["name"] for machine in machines
    ]

    row.detail = [
        machine["status"] for machine in machines
    ]

    row.event = [
        f"cli:grs_{machine["result"]}"  for machine in machines
    ]

    return row

dfc = pd.concat([
    dfc.loc[~(dfc.event == "cli:grs_out")],
    dfc[dfc.event == "cli:grs_out"].transform(
        parse_grs_out, axis=1
    ).explode(["pod","detail", "event"]),
]).reset_index(drop=True)

# ========= Load Cloud Hosts if existing =========

if cloud_hosts is not None:
    cloud_hosts = cloud_hosts_df.copy(deep=True)
    cloud_hosts=cloud_hosts[[
            "hostname", "releaseTime", "launchTime"
    ]]
    cloud_hosts.index.rename("pod", inplace=True)
    cloud_hosts.rename(
        columns = {
            "launchTime": "hf:launched",
            "releaseTime": "hf:released"
        },
        inplace=True
    )
    
dfc = dfc.set_index("time").sort_index().reset_index()

# ========= Pivot table =========

# This step pivots the table to have pods as indexes, events as columns
# and values as time. 

dfc_pivot = dfc.pivot_table(
    index="pod",
    columns="event",
    values="time",
    aggfunc=["first", "last"]
)

preempted_pods = None
dfc_pivot_preempted = None
# Remove preempted pod lines
if "pod:node_preempted" in dfc_pivot["first"].columns:
    preempted_pods = dfc_pivot[~dfc_pivot["first"]["pod:node_preempted"].isna()].index
    dfc_pivot_preempted = dfc_pivot.loc[preempted_pods]
    dfc_pivot.drop(
        index=preempted_pods,
        inplace=True
    )

preempted_pods_sched = None
dfc_pivot_preempted_sched = None
if "pod:preempted" in dfc_pivot["first"].columns:
    preempted_pods_sched = dfc_pivot[~dfc_pivot["first"]["pod:preempted"].isna()].index
    dfc_pivot_preempted_sched = dfc_pivot.loc[preempted_pods_sched]
    dfc_pivot.drop(
        index=preempted_pods_sched,
        inplace=True
    )

pod_schedule = dfc_pivot.copy(deep=True)

# ========= Calculate deltas =========

# Scaleup Delta
pod_scale_up = pd.DataFrame()
pod_scale_up["cli:rm->pod:create"] = (dfc_pivot["first"]["pod:create"] - dfc_pivot["first"]["cli:rm"]).transform(lambda x: x.total_seconds())
if cloud_hosts is not None:
    pod_scale_up["hf:launched->pod:scheduled"] = (dfc_pivot["last"]["pod:scheduled"] - cloud_hosts["hf:launched"]).transform(lambda x: x.total_seconds())
pod_scale_up["pod:create->pod:scheduled"] = (dfc_pivot["first"]["pod:scheduled"] - dfc_pivot["first"]["pod:create"]).transform(lambda x: x.total_seconds())
# pod_scale_up["container:started->cli:grs_succeed"] = (dfc_pivot["first"]["cli:grs_succeed"] - dfc_pivot["first"]["container:started"]).transform(lambda x: x.total_seconds())
pod_scale_up["pod:create->cli:grs_executing"] = (dfc_pivot["first"]["cli:grs_executing"] - dfc_pivot["first"]["pod:create"]).transform(lambda x: x.total_seconds())
pod_scale_up["pod:schedule->cli:grs_succeed"] = (dfc_pivot["first"]["cli:grs_succeed"] - dfc_pivot["first"]["pod:scheduled"]).transform(lambda x: x.total_seconds())

# Scaledown Delta
pod_scale_down = pd.DataFrame()
pod_scale_down["cli:rrm->pod:delete"] = (dfc_pivot["last"]["pod:delete"] - dfc_pivot["first"]["cli:rrm"]).transform(lambda x: x.total_seconds())

if cloud_hosts is not None:
    pod_scale_down["pod:delete->hf:released"] = (dfc_pivot["last"]["pod:delete"] - cloud_hosts["hf:released"]).transform(lambda x: x.total_seconds())

print(f"Run ID: {RUN_ID}")

if preempted_pods is not None:
    print(f"Number of preempted pods: {len(preempted_pods)}")

pod_scale_down = pod_scale_down.describe(
    percentiles=[
        0.25, 0.5, 0.75, 0.99
    ]
)

pod_scale_up = pod_scale_up.describe(
    percentiles=[
        0.25, 0.5, 0.75, 0.99
    ]
)

print(pod_scale_down)
pod_scale_up

In [None]:
dfc_pivot.to_csv(
    test_folder.joinpath(
        "pod_events.csv"
    )
)

In [None]:
pod_scale_up.rename(columns={
    "cli:rm->pod:create": "RequestMachine to Pod Created",
    "pod:create->pod:scheduled": "Pod Created to Scheduled"
})[[
    "RequestMachine to Pod Created",
    "Pod Created to Scheduled"
]].to_csv(
    test_folder.joinpath(
        "scaleup_stats.csv"
    )
)

# Pod Scale Up Plot

In [None]:
pod_scale_up_plot = pd.DataFrame()

pod_scale_up_plot["cli:rm"] = dfc_pivot["first"]["cli:rm"]
pod_scale_up_plot["pod:create"] = dfc_pivot["first"]["pod:create"]
pod_scale_up_plot["cli:grs_executing"] = dfc_pivot["first"]["cli:grs_executing"]
pod_scale_up_plot["pod:scheduled"] = dfc_pivot["first"]["pod:scheduled"]
pod_scale_up_plot["cli:grs_succeed"] = dfc_pivot["first"]["cli:grs_succeed"]

if cloud_hosts is not None:
    pod_scale_up_plot["hf:launched"] = cloud_hosts["hf:launched"]

pod_scale_up_plot = pod_scale_up_plot.melt().set_index("value").sort_index().reset_index()
pod_scale_up_plot["count"] = pod_scale_up_plot.groupby("variable").cumcount()
scale_up_index = pod_scale_up_plot["value"]

pod_scale_up_plot = pod_scale_up_plot.pivot(
    columns="variable",
    values="count"
)

scale_up_events = dfc[dfc.event == "scale_up"].time

if REQUEST_TIMESTAMP is not None:
    pod_scale_up_plot["time"] = scale_up_index.transform(
        lambda x: (x - REQUEST_TIMESTAMP).total_seconds()
    )
    scale_up_events = dfc[dfc.event == "scale_up"].time.transform(
        lambda x: (x - REQUEST_TIMESTAMP).total_seconds()
    )
else:
    pod_scale_up_plot["time"] = scale_up_index

pod_scale_up_plot = pod_scale_up_plot.set_index("time")

pod_scale_up_plot.head()


# ============= Configure Plot =============

# Reorder columns
pod_scale_up_plot = pod_scale_up_plot[[
    "cli:rm",
    "pod:create",
    "cli:grs_executing",
    "pod:scheduled",
    "cli:grs_succeed",
    # "hf:launched"
]]

pod_scale_up_plot.rename(
    columns={
        "cli:rm": "CLI Request Machine",
        "pod:create": "Pod Created",
        "cli:grs_executing": "CLI Get Request Status: Executing",
        "pod:scheduled": "Pod Scheduled",
        "cli:grs_succeed": "CLI Get Request Status: Succeded",
        # "hf:launched": "Host Factory API Value: Launched"
    },
    inplace=True
)

pod_scale_up_plot.columns.rename("Event Count", inplace=True)

if REQUEST_TIMESTAMP is not None:
    pod_scale_up_plot.index.rename("Time after HostFactory API request (seconds)", inplace=True)
else:
    pod_scale_up_plot.index.rename("Timestamp", inplace=True)

pod_scale_up_fig = pod_scale_up_plot.plot(
    kind="scatter",
    title="Pod Scale Up"
)

for scale_up_timestamp in scale_up_events.to_list():
    pod_scale_up_fig = pod_scale_up_fig.add_vline(
        scale_up_timestamp,
        line_dash="dash", line_color="green"
)

pod_scale_up_fig.show()


In [None]:
pod_scale_up_plot.to_csv(
    test_folder.joinpath("pod_scale_up.csv")
)

In [None]:
pod_scale_up_fig.write_image(
    file=test_folder.joinpath("pod_scale_up.svg"),
    format="svg",
    width=900,
    height=500,
)

pod_scale_up_fig.write_html(
    file=test_folder.joinpath("pod_scale_up.html"),
)

# Pod Scale Down Plot

In [None]:

pod_scale_down_plot = pd.DataFrame()
pod_scale_down_plot["cli:rrm"] = dfc_pivot["first"]["cli:rrm"]
pod_scale_down_plot["pod:delete"] = dfc_pivot["first"]["pod:delete"]

if cloud_hosts is not None:
    pod_scale_down_plot["hf:released"] = cloud_hosts["hf:released"]

pod_scale_down_plot = pod_scale_down_plot.melt().set_index("value").sort_index().reset_index()
pod_scale_down_plot["count"] = pod_scale_down_plot.groupby("variable").cumcount()
scale_down_index = pod_scale_down_plot["value"]

pod_scale_down_plot = pod_scale_down_plot.pivot(
    columns="variable",
    values="count"
)

if RETURN_TIMESTAMP is not None:
    pod_scale_down_plot["time"] = scale_down_index.transform(
        lambda x: (x - RETURN_TIMESTAMP).total_seconds()
    )
else:
    pod_scale_down_plot["time"] = scale_down_index

pod_scale_down_plot = pod_scale_down_plot.set_index("time")


# ============= Configure Plot =============

# Reorder columns
pod_scale_down_plot = pod_scale_down_plot[[
    "cli:rrm",
    # "hf:released",
    "pod:delete",
]]

pod_scale_down_plot.columns.rename("Event Count", inplace=True)

pod_scale_down_plot.rename(
    columns={
        "cli:rrm": "CLI Request Return Machines",
        "hf:released": "Host Factory API Timestamp: Released",
        "pod:delete": "Pod Deleted"
    },
    inplace=True
)

if REQUEST_TIMESTAMP is not None:
    pod_scale_down_plot.index.rename("Time after return API request (seconds)", inplace=True)
else:
    pod_scale_down_plot.index.rename("Timestamp", inplace=True)

pod_scale_down_fig = pod_scale_down_plot.plot(
    kind="scatter",
    title="Pod Scale Down"
)

pod_scale_down_fig.show()




In [None]:
pod_scale_down_fig.write_image(
    file=test_folder.joinpath("pod_scale_down.svg"),
    format="svg",
    width=900,
    height=500,
)

pod_scale_down_fig.write_html(
    file=test_folder.joinpath("pod_scale_down.html"),
)

# Node General Analysis

In [None]:

dfc = df.copy(deep=True)

scale_up_events = dfc[dfc.event == "scale_up"].time

# Filter pertinent events
dfc = dfc[
    dfc.event.isin([
            "pod:scheduled",
            "node:preempted",
            "node:create",
            "node:ready_patch",
            "pod:delete",
            "node:delete",
            "pod:create"
    ])
]

first_node_by_pod = dfc.pivot_table(
    index="pod",
    values="node",
    aggfunc="first"
)["node"]

# def backprogate_node_in_pod_creation(row):
#     row.node = first_node_by_pod.loc[row.pod]
#     return row

# dfc[dfc.event == "pod:create"] = dfc[dfc.event == "pod:create"].apply(backprogate_node_in_pod_creation, axis=1)

dfc_first = dfc.pivot_table(
    index="node",
    columns="event",
    values="time",
    aggfunc="first"
)

dfc = dfc.pivot_table(
    index="node",
    columns="event",
    values="time",
    aggfunc="last"
).drop(
    columns=[
        "pod:scheduled",
    ]
).join(dfc_first[["pod:scheduled"]])


first_pod_create_timestamp = dfc_pivot["first"]["pod:create"].min()
first_pod_delete_timestamp = dfc_pivot["last"]["pod:delete"].min()

# Removes nodes without pods
nodes_without_pods = dfc[dfc["pod:scheduled"].isna()].index
dfc.drop(index=nodes_without_pods, inplace=True)

# # Remove preempted nodes
if "node:preempted" in dfc.columns:
    preempted_nodes = dfc[~dfc["node:preempted"].isna()].index
    dfc.drop(index=preempted_nodes, inplace=True)

# Calculate delta 
dfc["first-pod-create->node:create"] = (
    dfc["node:create"] - first_pod_create_timestamp
).apply(lambda x: x.total_seconds())

dfc["pod:delete->node:delete"] = (dfc["node:delete"] - dfc["pod:delete"]).apply(lambda x: x.total_seconds())


print(f"Run ID: {RUN_ID}")
print(f"Number of node create events: {(df.event == "node:create").sum()}")
print(f"Number of node preemption events: {(df.event == "node:preempted").sum()}")
print(f"Number of unique nodes preempted: {len(df[df.event == "node:preempted"].node.unique())}")
print(f"Number of unique nodes: {len(df.node.dropna().unique())}")
print(f"Number of nodes without pods scheduled: {len(nodes_without_pods)}")

dfc_desc = dfc.describe(
    percentiles=[
        0.25, 0.5, 0.77, 0.99
    ]
)

dfc_desc = dfc_desc.rename(columns={
    "first-pod-create->node:create": "First Pod Created to Node Created",
    "pod:delete->node:delete": "First Pod Deleted to Node Deleted"
})

dfc_desc

# First (overall pod creation) 
# 

In [None]:
dfc.to_csv(
    test_folder.joinpath("node_events.csv")
)

# Node Scale Up Plot

In [None]:

node_scale_up = dfc.copy(deep=True)

node_scale_up = node_scale_up[[
    "node:create",
    # "node:preempted",
    "node:ready_patch",
    "pod:scheduled",
    # "pod:create"
]]

node_scale_up = node_scale_up.melt().set_index("value").sort_index().reset_index()

node_scale_up["count"] = node_scale_up.groupby("event").cumcount()

# node_scale_up_index = node_scale_up["value"].apply(
#     lambda x:  (x - first_pod_create_timestamp).total_seconds()
# )

node_scale_up_index = node_scale_up["value"]

node_scale_up = node_scale_up.pivot(
    columns="event",
    values="count"
)

node_scale_up["time"] = node_scale_up_index
node_scale_up = node_scale_up.set_index("time")


# =========== Format plot ===========

node_scale_up.columns.rename("Event Count", inplace=True)
# node_scale_up.index.rename("Time after first pod creation (seconds)", inplace=True)
node_scale_up.index.rename("Timestamp", inplace=True)


node_scale_up.rename(
    columns={
        "node:create": "Node Created",
        "pod:scheduled": "First Pod Scheduled",
        "pod:create": "First Pod Created",
        "node:ready_patch": "Node Ready"
    },
    inplace=True
)

node_scale_up_fig = node_scale_up.plot(
    kind="scatter",
    title="Node Scaleup",
)

node_scale_up_fig.update_traces(connectgaps=True)

node_scale_up_fig.add_vline(
    REQUEST_TIMESTAMP,
    line_dash="dash", line_color="green"
)

# node_scale_up_fig.update_layout(
#     plot_bgcolor="white",
# )

# for scale_up_timestamp in scale_up_events.to_list():
#     node_scale_up_fig = node_scale_up_fig.add_vline(
#         scale_up_timestamp,
#         line_dash="dash", line_color="green"
# )

node_scale_up_fig.show()

In [None]:
node_scale_up.to_csv(DATA_FOLDER.joinpath(RUN_ID).joinpath("node_scale_up.csv"))

In [None]:
node_scale_up_fig.write_image(
    file=test_folder.joinpath("node_scale_up.svg"),
    format="svg",
    width=900,
    height=500,
)

node_scale_up_fig.write_html(
    test_folder.joinpath("node_scale_up.html"),
)

# Node Scale Down Plot

In [None]:
node_scale_down = dfc.copy(deep=True)

node_scale_down = node_scale_down[[
    "pod:delete",
    "node:delete",
    # "node:preempted"
]]

node_scale_down = node_scale_down.melt().set_index("value").sort_index().reset_index()

node_scale_down["count"] = node_scale_down.groupby("event").cumcount()

node_scale_down.head()

In [None]:

node_scale_down_index = node_scale_down["value"].apply(
    lambda x:  (x - first_pod_delete_timestamp).total_seconds()
)

node_scale_down = node_scale_down.pivot(
    columns="event",
    values="count"
)

node_scale_down["time"] = node_scale_down_index
node_scale_down = node_scale_down.set_index("time")

# =========== Format plot ===========

node_scale_down.columns.rename("Event Count", inplace=True)
node_scale_down.index.rename("Time after first intentional pod deletion (seconds)", inplace=True)

node_scale_down = node_scale_down[[
    "pod:delete",
    "node:delete"
]]

node_scale_down.rename(
    columns={
        "pod:delete": "Last pod deleted",
        "node:delete": "Node deleted"
    },
    inplace=True
)

node_scale_down_fig = node_scale_down.plot(
    kind="scatter",
    title="Node Scaledown",
)

node_scale_down_fig.show()


In [None]:
dfc.to_csv(
    test_folder.joinpath("node_timing.csv")
)

dfc_desc.to_csv(
    test_folder.joinpath("node_timing_description.csv")
)

# Post Analysis

Parse .csv data

In [6]:
from dateutil import parser



cpu_df = pd.read_csv(
    test_folder
        .joinpath("gke_cpus.csv"),
    header=None,
    skiprows=2,
)

cpu_df = cpu_df.rename(columns={
    0: "time",
    1: "cpus"
})

cpu_df.time = cpu_df.time.transform(
    lambda x: (parser.parse(x.split(" (")[0]) - REQUEST_TIMESTAMP).total_seconds() - 60
)


In [7]:
df = pd.read_csv(
    test_folder
        .joinpath("parsed")
        .joinpath("node_events.csv"),
    parse_dates=[
        "node:create",
        "node:delete",
        "node:ready_patch",
        "pod:delete",
        "pod:scheduled"
    ]
)

df = df.set_index("node")

df["node:ready_patch"] = df["node:ready_patch"].transform(
    lambda x: (x - REQUEST_TIMESTAMP).total_seconds()
)

df["pod:scheduled"] = df["pod:scheduled"].transform(
    lambda x: (x - REQUEST_TIMESTAMP).total_seconds()
)


nodes = df["node:ready_patch"].sort_values().reset_index(drop=True).to_dict()
pods = df["pod:scheduled"].sort_values().reset_index(drop=True).to_dict()


In [34]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(
        x=list(nodes.values()),
        y=list(nodes.keys()),
        mode="lines",
        name="GKE - Number of nodes"
    )
)

fig.add_trace(
    go.Scatter(
        x=cpu_df.time,
        y=cpu_df.cpus,
        mode="lines",
        name="GKE - Number of cores"
    ),
    secondary_y=True
   
)

fig.add_trace(
    go.Scatter(
        x=list(pods.values()),
        y=list(pods.keys()),
        mode="lines",
        name="Number of Symphony Pods"
    )
)



fig.update_layout(
    title="Scaling performance of IBM Spectrum Symphony connector for GKE",
    plot_bgcolor="white",
    xaxis_range=[0,600],
    legend=dict(
        x=0.005,
        y=0.95,
        bordercolor='black',
        borderwidth=1
    )
    # yaxis_range=([0,6000],[0,1])
)


fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey',
    tickvals=list(range(60,660,60)),
    ticktext=[f"{x} min" for x in range(1,11)],
    title_text="Time After Symphony Request" 
    # tickangle=45
)
fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey',
    tickcolor="lightgrey",
    zerolinecolor='lightgrey',
    title_text="Number of Pods and Nodes" 

)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=False,
    showgrid=False,
    linecolor=None,
    gridcolor=None,
    title_text="Number of Cores",
    secondary_y=True,   
)




fig.show()

In [36]:
fig.write_image(
    file=test_folder.joinpath("scale_up.png"),
    format="png",
    width=800,
    height=500,
)


In [205]:
fig.write_html(
    test_folder.joinpath("scaleup.html")
)

In [39]:
import json

with open(test_folder.joinpath("pods_ready_count.json"),"w") as fh:
    json.dump(
        pods,
        fh
    )

with open(test_folder.joinpath("nodes_ready_count.json"),"w") as fh:
    json.dump(
        nodes,
        fh
    )