In [None]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:

SCRIPT_DIR = (
    os.path.dirname(os.path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
)
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
CHARTS_DIR = os.path.join(SCRIPT_DIR, "charts")
NUM_RUNS = 10

METRICS = [
    ("../data/pod_10", "response_time", "Response Time (ms)", 10),
    ("../data/pod_10", "replica", "Replica Count", 10),
    ("../data/pod_10", "cpu", "CPU Usage", 10),
    ("../data/pod_10", "memory", "Memory Usage", 10),
    ("../data/pod_20", "response_time", "Response Time (ms)", 20),
    ("../data/pod_20", "replica", "Replica Count", 20),
    ("../data/pod_20", "cpu", "CPU Usage", 20),
    ("../data/pod_20", "memory", "Memory Usage", 20),
]

PLOT_CONFIG = {
    "cpu": {"metric_name": "CPU Usage", "ylabel": "CPU (%)", "ylim": 100, "threshold": None},
    "memory": {
        "metric_name": "Memory Usage",
        "ylabel": "Memory (%)",
        "ylim": 100,
        "threshold": None,
    },
    "response_time": {
        "metric_name": "Response Time",
        "ylabel": "Response Time (ms)",
        "ylim": 1300,
        "threshold": 1000,
    },
    "replica": {
        "metric_name": "Desired Replicas",
        "ylabel": "Replicas",
        "ylim": 20,
        "threshold": None,
    },
}



In [None]:
def normalize_folder(folder: str) -> str:
    normalized = folder.replace("\\", "/")
    while normalized.startswith("../"):
        normalized = normalized[3:]
    return normalized.strip("/")


def load_influx_csv(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, skiprows=3, comment="#")
    df = df.drop(columns=[c for c in df.columns if "Unnamed" in str(c)], errors="ignore")
    df["_time"] = pd.to_datetime(df["_time"], errors="coerce")
    df["_value"] = pd.to_numeric(df["_value"], errors="coerce")
    return df


def load_all_runs(base_path, metric_folder, num_runs):
    runs = {}
    rel = base_path[5:] if base_path.startswith("data/") else base_path
    for i in range(1, num_runs + 1):
        path = os.path.join(DATA_DIR, rel, metric_folder, f"{i}.csv")
        if os.path.exists(path):
            try:
                runs[i] = load_influx_csv(path)
            except Exception as e:
                print(f"Error loading {path}: {e}")
    return runs


def calculate_run_statistics(runs, metric_name):
    stats = []
    for run_id, df in runs.items():
        if "deployment" not in df.columns or "_value" not in df.columns:
            continue

        hpa_data = df[df["deployment"] == "hpa-flask-app"]["_value"]
        rl_data = df[df["deployment"] == "test-flask-app"]["_value"]
        if len(hpa_data) == 0 or len(rl_data) == 0:
            continue

        hpa_mean = hpa_data.mean()
        rl_mean = rl_data.mean()
        hpa_std = hpa_data.std()
        rl_std = rl_data.std()
        hpa_max = hpa_data.max()
        rl_max = rl_data.max()

        improvement = ((hpa_mean - rl_mean) / hpa_mean * 100) if hpa_mean != 0 else 0
        stability_score = 1 / (hpa_std + rl_std + 1)

        stats.append(
            {
                "Run": run_id,
                "Metric": metric_name,
                "HPA Mean": hpa_mean,
                "RL Mean": rl_mean,
                "HPA Std": hpa_std,
                "RL Std": rl_std,
                "HPA Max": hpa_max,
                "RL Max": rl_max,
                "Improvement (%)": improvement,
                "Data Points": len(df),
                "Stability": stability_score,
            }
        )

    return pd.DataFrame(stats)


def get_recommendation_score(stats_df, metric_type):
    df = stats_df.copy()

    if len(df) > 1:
        df["Improvement_norm"] = (df["Improvement (%)"] - df["Improvement (%)"].min()) / (
            df["Improvement (%)"].max() - df["Improvement (%)"].min() + 0.001
        )
        df["Stability_norm"] = (df["Stability"] - df["Stability"].min()) / (
            df["Stability"].max() - df["Stability"].min() + 0.001
        )
    else:
        df["Improvement_norm"] = 0.5
        df["Stability_norm"] = 0.5

    if metric_type in ["Response Time", "CPU", "Memory"]:
        df["Score"] = (0.7 * df["Improvement_norm"]) + (0.3 * df["Stability_norm"])
    else:
        df["Score"] = (0.5 * df["Improvement_norm"]) + (0.5 * df["Stability_norm"])

    return df.sort_values("Score", ascending=False)


def metric_type_from_label(metric_label: str) -> str:
    if "Response Time" in metric_label:
        return "Response Time"
    if "CPU" in metric_label:
        return "CPU"
    if "Memory" in metric_label:
        return "Memory"
    return "Replica"


def extract_recommended_runs(stats_df, metric_type):
    if stats_df.empty or "Run" not in stats_df.columns:
        return []
    ranked = get_recommendation_score(stats_df, metric_type)
    return ranked["Run"].astype(int).tolist()


def plot_single_run_high_quality(
    df,
    run_id,
    metric_name,
    ylabel,
    ylim,
    pod_label,
    recommendation_tag="",
    threshold=None,
    save_path=None,
):
    deploy_a = "hpa-flask-app"
    deploy_b = "test-flask-app"
    colors = {deploy_a: "#0072B2", deploy_b: "#D55E00"}

    plot_df = df.copy()
    min_time = plot_df["_time"].min()
    plot_df["_elapsed_min"] = (plot_df["_time"] - min_time).dt.total_seconds() / 60

    fig, ax = plt.subplots(1, 1, figsize=(12, 6))

    for deploy in [deploy_a, deploy_b]:
        subset = plot_df[plot_df["deployment"] == deploy].sort_values("_elapsed_min")
        label = "HPA" if deploy == deploy_a else "RL Agent"
        ax.plot(
            subset["_elapsed_min"],
            subset["_value"],
            label=label,
            color=colors[deploy],
            linewidth=2,
            alpha=0.9,
        )

    ax.set_ylabel(ylabel, fontsize=13)
    ax.set_xlabel("Elapsed Time (minutes)", fontsize=13)
    ax.set_title(
        f"{metric_name} - Run #{run_id} ({pod_label})",
        fontsize=15,
        fontweight="bold",
    )
    ax.set_ylim(0, ylim)

    if "Replicas" in metric_name:
        ax.yaxis.set_major_locator(plt.MaxNLocator(integer=True))

    if threshold:
        ax.axhline(
            y=threshold,
            color="red",
            linestyle="--",
            linewidth=2,
            alpha=0.7,
            label=f"Threshold ({threshold}ms)",
        )

    ax.legend(loc="upper right", fontsize=11)
    ax.grid(True, alpha=0.3)
    fig.tight_layout()

    if save_path:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        fig.savefig(save_path, dpi=300, bbox_inches="tight")
        print(f"  Saved: {save_path}")

    plt.close()

In [None]:
runs_by_key = {}
stats_by_key = {}
recommendations_by_key = {}

print("Loading test runs from all steps...")
for folder, metric_dir, metric_label, pod in METRICS:
    scenario_folder = normalize_folder(folder)
    key = (pod, metric_dir)
    runs = load_all_runs(scenario_folder, metric_dir, NUM_RUNS)
    stats_df = calculate_run_statistics(runs, metric_label)
    metric_type = metric_type_from_label(metric_label)
    recommendations = extract_recommended_runs(stats_df, metric_type)

    runs_by_key[key] = runs
    stats_by_key[key] = stats_df
    recommendations_by_key[key] = recommendations

    print(f"  pod_{pod}/{metric_dir}: {len(runs)} runs")

print("\nCreating charts for all runs (recommended runs are tagged)...")
for folder, metric_dir, metric_label, pod in METRICS:
    key = (pod, metric_dir)
    runs = runs_by_key.get(key, {})
    top_runs = recommendations_by_key.get(key, [])[:3]
    recommended_set = set(top_runs)

    if not runs:
        continue

    output_dir = os.path.join(CHARTS_DIR, f"pod_{pod}", metric_dir)
    os.makedirs(output_dir, exist_ok=True)

    config = PLOT_CONFIG[metric_dir]
    for run_id in sorted(runs.keys()):
        recommendation_tag = "recommended" if run_id in recommended_set else ""
        recommendation_suffix = "_recommended" if recommendation_tag == "recommended" else ""
        save_path = os.path.join(
            output_dir,
            f"deskriptif_{metric_dir}_run_{run_id}{recommendation_suffix}.png",
        )
        plot_single_run_high_quality(
            runs[run_id],
            run_id,
            config["metric_name"],
            config["ylabel"],
            config["ylim"],
            f"Pod {pod}",
            recommendation_tag,
            config["threshold"],
            save_path,
        )

print("\nDeskriptif chart generation completed.")

In [None]:
def generate_deskriptif_summary(stats_by_key, out_dir):
    """Generate LaTeX table summarizing mean HPA/RL values per scenario, pod, and metric."""
    rows = []
    for key, df in stats_by_key.items():
        pod, metric_dir = key
        if df is None or df.empty:
            continue
        for _, r in df.iterrows():
            rows.append(
                {
                    "pod": (f"pod_{pod}").replace("_", "\\_"),
                    "metric_dir": metric_dir,
                    "metric_label": r.get("Metric", metric_dir),
                    "hpa_mean": r.get("HPA Mean", np.nan),
                    "rl_mean": r.get("RL Mean", np.nan),
                }
            )

    if not rows:
        print("No descriptive stats available to summarize.")
        return None

    df_all = pd.DataFrame(rows)

    # Group by all scenario dimensions + metric, averaging across runs
    summary = (
        df_all.groupby(["pod", "metric_label"])[["hpa_mean", "rl_mean"]]
        .mean()
        .reset_index()
        .sort_values(["pod", "metric_label"])
    )

    os.makedirs(out_dir, exist_ok=True)

    # create overall per-metric mean across all scenarios
    overall = (
        df_all.groupby(["metric_label"])[["hpa_mean", "rl_mean"]]
        .mean()
        .reset_index()
        .sort_values("metric_label")
    )

    # --- Overall per-metric table ---
    overall_path = os.path.join(out_dir, "analisis_deskriptif_rangkuman.tex")
    lines = []
    lines.append("\\begin{table}[ht]")
    lines.append("  \\centering")
    lines.append("  \\caption{Rangkuman Rata-rata per Metrik (seluruh skenario)}\\label{tab:deskriptif-summary-per-metric}")
    lines.append("  \\begin{tabular}{lcc}")
    lines.append("    \\toprule")
    lines.append(r"    Metrik & Rata-rata HPA & Rata-rata RL \\")
    lines.append("    \\midrule")

    for _, r in overall.iterrows():
        metric = r["metric_label"]
        hpa_str = f"{r['hpa_mean']:.3f}" if not pd.isna(r["hpa_mean"]) else "-"
        rl_str = f"{r['rl_mean']:.3f}" if not pd.isna(r["rl_mean"]) else "-"
        lines.append(f"    {metric} & {hpa_str} & {rl_str} " + r"\\")

    lines.append("    \\bottomrule")
    lines.append("  \\end{tabular}")
    lines.append("\\end{table}")

    with open(overall_path, "w") as f:
        f.write("\n".join(lines))

    print(f"Saved overall per-metric LaTeX: {overall_path}")

    # --- Per-metric per-scenario tables (optional detailed tables) ---
    for metric_label, group_df in summary.groupby("metric_label"):
        safe = (
            metric_label.replace(" ", "_").replace("(", "").replace(")", "").replace("/", "_")
        )
        path = os.path.join(out_dir, f"analisis_deskriptif_{safe}.tex")
        lines = []
        lines.append("\\begin{table}[ht]")
        lines.append("  \\centering")
        lines.append(f"  \\caption{{Rata-rata {metric_label} per Skenario}}\\label{{tab:deskriptif-{safe}}}")
        lines.append("  \\begin{tabular}{llllcc}")
        lines.append("    \\toprule")
        lines.append(r"    Pod & Metrik & Rata-rata HPA & Rata-rata RL \\")
        lines.append("    \\midrule")

        prev_group = None
        for _, r in group_df.iterrows():
            pod = r["pod"]
            metric = r["metric_label"]
            hpa_str = f"{r['hpa_mean']:.3f}" if not pd.isna(r["hpa_mean"]) else "-"
            rl_str = f"{r['rl_mean']:.3f}" if not pd.isna(r["rl_mean"]) else "-"

            group = (pod)
            if prev_group is not None and group != prev_group:
                lines.append("    \\midrule")
            prev_group = group

            lines.append(f"    {pod} & {metric} & {hpa_str} & {rl_str} " + r"\\")

        lines.append("    \\bottomrule")
        lines.append("  \\end{tabular}")
        lines.append("\\end{table}")

        with open(path, "w") as f:
            f.write("\n".join(lines))

        print(f"Saved per-metric LaTeX: {path}")

    return overall_path

try:
    SUMMARY_OUT = os.path.join("tables")
    generate_deskriptif_summary(stats_by_key, SUMMARY_OUT)
except Exception as e:
    print(f"Failed to generate descriptive summary: {e}")