In [11]:
# =========================
# CELL 1 — Config & Paths (clean + judge-friendly)
# - Tách rõ: DATA/MODEL vs AUTOSCALING/SIM
# - Window-aware + Metric-aware (buffer/capacity)
# - Không phá các helper đã dùng ở cell sau
# =========================

import os, re, json, math
from datetime import datetime, timezone
from typing import Dict, Any, List, Tuple, Optional

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(42)

# -------------------------
# Paths
# -------------------------
from pathlib import Path
import os, shutil

# Root luôn là thư mục notebooks (vì notebook nằm trong notebooks/)
PROJECT_ROOT = Path.cwd()   # => .../AUTOSCALING-ANALYSIS/notebooks

# (Optional) Nếu data đang nằm ở ../data thì copy vào notebooks/data để mọi thứ "nằm trong notebooks"
src_data = (PROJECT_ROOT / ".." / "data").resolve()
dst_data = (PROJECT_ROOT / "data").resolve()

if not (dst_data / "raw").exists() and (src_data / "raw").exists():
    dst_data.mkdir(parents=True, exist_ok=True)
    shutil.copytree(src_data, dst_data, dirs_exist_ok=True)
    print(f"✅ Copied data from {src_data} -> {dst_data}")

PROJECT_ROOT = str(PROJECT_ROOT)  # giữ kiểu string cho code hiện tại
OUT_02 = os.path.join(PROJECT_ROOT, "outputs", "02_eda")
OUT_03 = os.path.join(PROJECT_ROOT, "outputs", "03_features")
OUT_04 = os.path.join(PROJECT_ROOT, "outputs", "04_models")
OUT_04P = os.path.join(OUT_04, "predictions")
OUT_05 = os.path.join(PROJECT_ROOT, "outputs", "05_scaling")

for p in [OUT_02, OUT_03, OUT_04, OUT_04P, OUT_05]:
    os.makedirs(p, exist_ok=True)

# -------------------------
# Core helpers (keep as-is for other cells)
# -------------------------
def tag_minutes(tag: str) -> int:
    return {"1m": 1, "5m": 5, "15m": 15}[tag]

def steps_per_day(tag: str) -> int:
    return int(24 * 60 / tag_minutes(tag))

def steps_per_hour(tag: str) -> int:
    return int(60 / tag_minutes(tag))

def resolve_roll_windows(tag: str, roll_windows: List[str]) -> Dict[str, int]:
    sph = steps_per_hour(tag)
    spd = steps_per_day(tag)
    out = {}
    for w in roll_windows:
        if w == "1h":
            out[w] = 1 * sph
        elif w == "6h":
            out[w] = 6 * sph
        elif w == "1d":
            out[w] = 1 * spd
        else:
            raise ValueError(f"Unsupported roll window: {w}")
    return out

# -------------------------
# CFG (one source of truth)
# -------------------------
CFG: Dict[str, Any] = {
    # ===== Dataset =====
    "RAW_LOG_PATH": os.path.join(PROJECT_ROOT, "data", "access_log.txt"),  # optional
    "TAGS": ["1m", "5m", "15m"],
    "TIME_COL_RAW": "timestamp",
    "TIME_COL_BUCKET": "bucket_start",

    # Storm gap (problem statement)
    "STORM_START": pd.Timestamp("1995-08-01 14:52:01"),
    "STORM_END":   pd.Timestamp("1995-08-03 04:36:13"),

    # ===== Feature engineering =====
    "LAG_DAYS": [1,2,3,4,5,6,7],
    "ROLL_WINDOWS": ["1h","6h","1d"],
    "ROLL_USE_STD": True,
    "USE_CYCLIC": True,
    "HORIZON_STEPS": 1,
    "KEEP_RAW_EXTRA": [
        "unique_hosts","err_4xx","err_5xx","error_rate",
        "is_missing_bucket","is_gap_storm","is_gap_unknown"
    ],
    "REQUIRE_COLS": ["bucket_start","hits","bytes_sum","is_gap"],

    # ===== Modeling =====
    "TARGETS": ["hits", "bytes_sum"],
    "XGB_PARAMS": dict(
        booster="gbtree",
        n_estimators=5000,
        early_stopping_rounds=50,
        objective="reg:squarederror",
        max_depth=6,
        learning_rate=0.05,
        subsample=0.9,
        colsample_bytree=0.9,
        reg_lambda=1.0,
        random_state=42,
    ),
    "CV_SPLITS": 5,
    "CV_TEST_DAYS": 2,
    "CV_GAP_STEPS": 1,

    # ==========================================================
    # AUTOSCALING / SIMULATION CONFIG (Window-aware + Metric-aware)
    # ==========================================================
    "SCALING": {
        # bounds
        "min_instances": 2,
        "max_instances": 50,

        # unit cost
        "cost_per_instance_per_hour": 0.05,

        # window -> minutes
        "window_minutes": {"1m": 1, "5m": 5, "15m": 15},

        # --- Metric-aware safety buffer (tránh bytes_sum bị under-provision)
        # hits thường ổn với buffer vừa; bytes_sum hay burst => buffer cao hơn
        "safety_buffer_by_metric": {"hits": 0.3, "bytes_sum": 0.3},

        # --- Per-instance capacity (tune để required_instances có dao động đẹp)
        # NOTE: nếu muốn demo "predictive có phản ứng", hạ bytes_sum cap xuống
        "capacity_per_instance": {
            ("hits","1m"): 20, ("hits","5m"): 100, ("hits","15m"): 350,
            ("bytes_sum","1m"): 350_000, ("bytes_sum","5m"): 1_200_000, ("bytes_sum","15m"): 3_500_000,
        },

        # --- Step change per window (15m không nên nhảy quá lớn cho đẹp)
        "max_step_change_by_window": {"1m": 6, "5m": 10, "15m": 15},

        # --- Hysteresis per window (1m noise => high/low lớn hơn)
        # high: số cửa sổ liên tiếp vượt ngưỡng mới scale-out
        # low : số cửa sổ liên tiếp dưới ngưỡng mới scale-in
        "hysteresis_by_window": {
            "1m": {"high": 2, "low": 6, "in_margin": 0.18},
            "5m": {"high": 1, "low": 4, "in_margin": 0.15},
            "15m":{"high": 1, "low": 2, "in_margin": 0.12},
        },

        "predictive_deadband_by_window": {"1m": 0.5, "5m": 0.5, "15m": 0.5},

        # --- cooldown (tính theo phút, convert trong code)
        "cooldown_minutes": {"base": 8, "spike": 15},

        # --- provisioning per window
        "provisioning_by_window": {
            "1m": {"warmup_windows": 1, "min_uptime_windows": 6},
            "5m": {"warmup_windows": 1, "min_uptime_windows": 4},
            "15m":{"warmup_windows": 0, "min_uptime_windows": 2},
        },

        # --- Reactive (rescue) knobs
        "reactive": {
            "enabled": True,
            "overload_scale_out_immediate": True,
            "rescue_extra_instances": 3,
            "queue_low_fraction": 0.05,
            "queue_high_multiplier": 4.0,  # cao hơn để giảm false rescue => đẹp demo
        },

        # --- SLO / latency model (đơn giản hóa)
        "slo": {
            "base_latency_ms": 80.0,
            "alpha_latency_per_unit_queue": 0.15,
            "p95_latency_target_ms": 300.0,
        },

        # --- Anomaly detection (MAD) theo lookback giờ (convert trong code)
        "anomaly": {
            "enabled": True,
            "method": "mad",
            "lookback_hours": 2,
            "mad_k": 6.0,
            "min_points": 10,
            "max_flag_rate": 0.30,
        },

        # --- DDoS mode (force step per window)
        "ddos_mode": {
            "enabled": True,
            "force_scale_out_step_by_window": {"1m": 6, "5m": 10, "15m": 12},
            "max_instances_during_ddos": 50,
        },
    }
}

print("✅ Cell 1 done — paths ready + CFG ready (CFG['SCALING'] exists)")


✅ Cell 1 done — paths ready + CFG ready (CFG['SCALING'] exists)


In [13]:

# 06_dashboard_preview.ipynb
# Generates streamlit_app.py (dashboard) consistent with artifacts in outputs/*

import os
from pathlib import Path
import pandas as pd

# notebook nằm trong notebooks/ => root của dashboard cũng là notebooks/
PROJECT_ROOT = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
PROJECT_ROOT = PROJECT_ROOT.resolve()

OUT_05 = PROJECT_ROOT / "outputs" / "05_scaling"
OUT_05.mkdir(parents=True, exist_ok=True)

app_path = PROJECT_ROOT / "streamlit_app.py"


# Convert tuple keys to strings for JSON export
SC = CFG["SCALING"].copy()
cap_map={}
for (m,w), v in SC["capacity_per_instance"].items():
    cap_map[f"{m}__{w}"]=v
SC["capacity_per_instance"]=cap_map

cfg_scaling_path = OUT_05 / "_cfg_scaling.json"
pd.Series(SC).to_json(cfg_scaling_path)


APP = '''
# streamlit_app.py
# ============================================================
# Streamlit Dashboard — Forecast + Autoscaling (consistent with notebook cells)
# - Reads artifacts in outputs/*
# - Sidebar controls WORK (form + session_state)
# - Has KPI cards + p95 latency + queue/util charts
# - Fixes KeyError: ('hits','5m') by normalizing capacity_per_instance keys
# ============================================================

import os, math
import numpy as np
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt

# -----------------------------
# Basic page config
# -----------------------------
st.set_page_config(page_title="Autoscaling Analysis", layout="wide")
np.random.seed(42)

from pathlib import Path
import os, math, shutil
import numpy as np
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt


# Root = thư mục chứa streamlit_app.py  => .../AUTOSCALING-ANALYSIS/notebooks
PROJECT_ROOT = Path(__file__).resolve().parent

# (Optional) Nếu data đang nằm ở ../data thì copy vào notebooks/data
src_data = (PROJECT_ROOT / ".." / "data").resolve()
dst_data = (PROJECT_ROOT / "data").resolve()
if not (dst_data / "raw").exists() and (src_data / "raw").exists():
    dst_data.mkdir(parents=True, exist_ok=True)
    shutil.copytree(src_data, dst_data, dirs_exist_ok=True)

OUT_04P = str(PROJECT_ROOT / "outputs" / "04_models" / "predictions")
OUT_04  = str(PROJECT_ROOT / "outputs" / "04_models")
OUT_05  = str(PROJECT_ROOT / "outputs" / "05_scaling")
METRICS_PATH = str(PROJECT_ROOT / "outputs" / "04_models" / "metrics_forecast.csv")


# ============================================================
# CFG (copy from CELL 1 — keep consistent)
# NOTE: tuple keys in capacity_per_instance will be normalized for Streamlit.
# ============================================================
CFG = {
    "TAGS": ["1m", "5m", "15m"],
    "TARGETS": ["hits", "bytes_sum"],
    "SCALING": {
        "min_instances": 2,
        "max_instances": 50,
        "cost_per_instance_per_hour": 0.05,
        "window_minutes": {"1m": 1, "5m": 5, "15m": 15},
        "safety_buffer_by_metric": {"hits": 0.3, "bytes_sum": 0.3},
        "capacity_per_instance": {
            ("hits","1m"): 20, ("hits","5m"): 100, ("hits","15m"): 350,
            ("bytes_sum","1m"): 350_000, ("bytes_sum","5m"): 1_200_000, ("bytes_sum","15m"): 3_500_000,
        },
        "max_step_change_by_window": {"1m": 6, "5m": 10, "15m": 15},
        "hysteresis_by_window": {
            "1m": {"high": 2, "low": 6, "in_margin": 0.18},
            "5m": {"high": 2, "low": 4, "in_margin": 0.18},
            "15m": {"high": 1, "low": 2, "in_margin": 0.12},
        },
        "cooldown_minutes": {"base": 15, "spike": 15},
        "provisioning_by_window": {
            "1m": {"warmup_windows": 1, "min_uptime_windows": 6},
            "5m": {"warmup_windows": 1, "min_uptime_windows": 4},
            "15m":{"warmup_windows": 0, "min_uptime_windows": 2},
        },
        "slo": {
            "base_latency_ms": 80.0,
            "alpha_latency_per_unit_queue": 0.15,
            "p95_latency_target_ms": 300.0,
        },
        "anomaly": {
            "enabled": True,
            "lookback_hours": 2,
            "mad_k": 6.0,
            "min_points": 10,
        },
        "ddos_mode": {
            "enabled": True,
            "force_scale_out_step_by_window": {"1m": 6, "5m": 10, "15m": 12},
            "max_instances_during_ddos": 50,
        },
    }
}

SC = CFG["SCALING"]

# ============================================================
# Normalizers / helpers (fix tuple-key issues)
# ============================================================
def normalize_capacity_keys(cap_dict):
    """
    Ensure SC["capacity_per_instance"] supports (metric, window) lookup.
    Handles cases where dict keys might become strings when copied/loaded elsewhere.
    """
    if not isinstance(cap_dict, dict):
        return {}

    out = {}
    for k, v in cap_dict.items():
        if isinstance(k, tuple) and len(k) == 2:
            out[(str(k[0]), str(k[1]))] = float(v)
        elif isinstance(k, str):
            ks = k.strip()
            # common patterns:
            # "('hits', '5m')" or "hits,5m" or "hits|5m"
            if ks.startswith("(") and "," in ks:
                try:
                    # very small, safe parse (no eval)
                    ks2 = ks.strip("()")
                    a, b = ks2.split(",", 1)
                    a = a.strip().strip("'").strip('"')
                    b = b.strip().strip("'").strip('"')
                    out[(a, b)] = float(v)
                    continue
                except Exception:
                    pass
            if "|" in ks:
                a, b = ks.split("|", 1)
                out[(a.strip(), b.strip())] = float(v)
                continue
            if "," in ks:
                a, b = ks.split(",", 1)
                out[(a.strip(), b.strip())] = float(v)
                continue
        # ignore unknown key formats
    return out

SC["capacity_per_instance"] = normalize_capacity_keys(SC.get("capacity_per_instance", {}))

def win_minutes(window: str) -> int:
    return int(SC["window_minutes"][window])

def win_hours(window: str) -> float:
    return win_minutes(window) / 60.0

def clamp_instances(x: int) -> int:
    return max(int(SC["min_instances"]), min(int(SC["max_instances"]), int(x)))

def cap(metric: str, window: str) -> float:
    key = (str(metric), str(window))
    if key not in SC["capacity_per_instance"]:
        raise KeyError(f"capacity_per_instance missing key={key}. Please check CFG['SCALING']['capacity_per_instance'].")
    return float(SC["capacity_per_instance"][key])

def buffer(metric: str) -> float:
    return float(SC["safety_buffer_by_metric"].get(metric, 0.2))

def step_limit(window: str) -> int:
    return int(SC["max_step_change_by_window"].get(window, 10))

def required_instances(demand: float, metric: str, window: str) -> int:
    d = max(0.0, float(demand))
    c = max(cap(metric, window), 1e-9)
    need = (d / c) * (1.0 + buffer(metric))
    return clamp_instances(int(math.ceil(need)))

def _apply_step_towards(inst, target, max_step):
    delta = int(target) - int(inst)
    if delta == 0:
        return int(inst)
    step = int(np.sign(delta)) * min(abs(delta), int(max_step))
    return clamp_instances(int(inst) + step)

def daily_event_counts(ev_df: pd.DataFrame):
    if ev_df is None or ev_df.empty:
        return pd.DataFrame(columns=["date","scale_out","scale_in","total"])
    tmp = ev_df.copy()
    tmp["timestamp"] = pd.to_datetime(tmp["timestamp"])
    tmp["date"] = tmp["timestamp"].dt.date.astype(str)
    tmp["is_out"] = tmp["action"].astype(str).str.contains("scale_out", case=False, na=False).astype(int)
    tmp["is_in"]  = tmp["action"].astype(str).str.contains("scale_in",  case=False, na=False).astype(int)
    g = tmp.groupby("date")[["is_out","is_in"]].sum().reset_index().rename(columns={"is_out":"scale_out","is_in":"scale_in"})
    g["total"] = g["scale_out"] + g["scale_in"]
    return g.sort_values("date").reset_index(drop=True)

def instance_distribution(sim_df: pd.DataFrame):
    g = sim_df["instances"].astype(int).value_counts().sort_index()
    out = pd.DataFrame({"instances": g.index, "count": g.values})
    out["pct_time"] = out["count"] / out["count"].sum()
    return out

def mad_anomaly_flags(series: pd.Series, window_pts: int, k: float, min_points: int = 10):
    x = series.astype(float).copy()
    mp = max(1, min(int(window_pts), max(int(min_points), int(window_pts)//2)))
    med = x.rolling(int(window_pts), min_periods=mp).median()
    mad = (x - med).abs().rolling(int(window_pts), min_periods=mp).median()
    score = (x - med).abs() / mad.replace(0, np.nan)
    is_spike = (score > k).fillna(False).astype(int)
    return score.fillna(0.0), is_spike

def ddos_flag(is_spike: pd.Series, consec: int):
    run = is_spike.rolling(consec, min_periods=consec).sum()
    return (run >= consec).fillna(False).astype(int)

def simulate_queue_latency(sim_df: pd.DataFrame, lat_cfg: dict):
    df = sim_df.sort_values("timestamp").reset_index(drop=True).copy()
    q = 0.0
    ql, util, p95, slo = [], [], [], []
    for _, r in df.iterrows():
        load = float(r["y_true"])
        cap_total = float(r["capacity_total"])
        _served = min(load + q, cap_total)
        q = max(0.0, (load + q) - cap_total)
        q = max(0.0, q * (1.0 - float(lat_cfg["queue_decay"])))
        u = 0.0 if cap_total <= 1e-9 else min(2.0, load / cap_total)
        p = float(lat_cfg["base_ms"]) + float(lat_cfg["alpha_ms_per_queue_unit"]) * q
        v = bool(p > float(lat_cfg["p95_target_ms"]))
        ql.append(float(q)); util.append(float(u)); p95.append(float(p)); slo.append(v)
    df["queue_len"] = ql
    df["utilization"] = util
    df["p95_latency_ms"] = p95
    df["slo_violation"] = pd.Series(slo).astype(bool)
    return df

def summarize(sim_df: pd.DataFrame, ev_df: pd.DataFrame) -> dict:
    metric = str(sim_df["metric"].iloc[0])
    window = str(sim_df["window"].iloc[0])
    policy = str(sim_df["policy_mode"].iloc[0])
    n = int(len(sim_df))
    total_cost = float(sim_df["cost_step"].sum())
    total_server_hours = float(sim_df["server_hours_step"].sum())
    avg_instances = float(sim_df["instances"].mean())
    peak_instances = int(sim_df["instances"].max())
    sla_violation_rate = float(sim_df["sla_violation"].mean())
    slo_violation_rate = float(sim_df["slo_violation"].mean()) if "slo_violation" in sim_df.columns else np.nan
    total_under = float(sim_df["under_provision"].sum())
    max_under = float(sim_df["under_provision"].max())
    num_scale_events = int(len(ev_df)) if ev_df is not None else 0
    sim_hours = (n * win_minutes(window)) / 60.0
    events_per_hour = float(num_scale_events / max(sim_hours, 1e-9))
    return {
        "metric": metric, "window": window, "policy_mode": policy,
        "estimated_total_cost": total_cost,
        "total_server_hours": total_server_hours,
        "avg_instances": avg_instances,
        "peak_instances": peak_instances,
        "sla_violation_rate": sla_violation_rate,
        "slo_violation_rate": float(slo_violation_rate),
        "total_under_provision": total_under,
        "max_under_provision": max_under,
        "num_scale_events": num_scale_events,
        "events_per_hour": events_per_hour,
        "num_points": n,
    }

# ============================================================
# Data loaders (cache MUST include params)
# ============================================================
@st.cache_data(show_spinner=False)
def load_pred_case(metric: str, window: str, model_tag: str) -> pd.DataFrame:
    fp = os.path.join(OUT_04P, f"pred_{metric}_{window}_{model_tag}.csv")
    if not os.path.exists(fp):
        raise FileNotFoundError(fp)

    dfp = pd.read_csv(fp)
    # robust timestamp normalize
    ts = pd.to_datetime(dfp["bucket_start"], utc=True, errors="coerce").dt.tz_convert(None)
    dfp = dfp.assign(timestamp=ts).dropna(subset=["timestamp"]).copy()

    df_case = pd.DataFrame({
        "timestamp": dfp["timestamp"],
        "y_true": pd.to_numeric(dfp[metric], errors="coerce").fillna(0.0),
        "y_pred": pd.to_numeric(dfp["pred"], errors="coerce").fillna(0.0),
    }).sort_values("timestamp").reset_index(drop=True)

    return df_case

@st.cache_data(show_spinner=False)
def load_metrics_long() -> pd.DataFrame:
    if not os.path.exists(METRICS_PATH):
        return pd.DataFrame(columns=["model","target","window","split","metric","value"])
    return pd.read_csv(METRICS_PATH)

def get_forecast_kpis(metrics_df: pd.DataFrame, model: str, target: str, window: str):
    x = metrics_df.copy()
    if x.empty:
        return {"RMSE": np.nan, "MAE": np.nan, "MAPE": np.nan}
    x["split"] = x["split"].astype(str).str.lower()
    x["model"] = x["model"].astype(str)
    x["target"] = x["target"].astype(str)
    x["window"] = x["window"].astype(str)
    filt = x[(x["split"]=="test") & (x["model"]==model) & (x["target"]==target) & (x["window"]==window)]
    if filt.empty:
        return {"RMSE": np.nan, "MAE": np.nan, "MAPE": np.nan}
    out = {}
    for k in ["RMSE","MAE","MAPE"]:
        r = filt[filt["metric"]==k]["value"]
        out[k] = float(r.iloc[0]) if len(r) else np.nan
    return out

# ============================================================
# Simulation (consistent with CELL10)
# ============================================================
def simulate_static(df_case: pd.DataFrame, metric: str, window: str, static_n: int):
    df = df_case.sort_values("timestamp").reset_index(drop=True).copy()
    wh = win_hours(window)
    unit_cost = float(SC["cost_per_instance_per_hour"])
    inst = clamp_instances(static_n)
    rows = []
    for _, r in df.iterrows():
        ts = r["timestamp"]
        y_true = float(r["y_true"])
        y_pred = float(r["y_pred"])
        capacity_total = inst * cap(metric, window)
        headroom = capacity_total - y_true
        under = max(0.0, -headroom)
        over = max(0.0, headroom)
        cost_step = inst * wh * unit_cost
        rows.append({
            "timestamp": ts, "metric": metric, "window": window, "policy_mode": "static",
            "y_true": y_true, "y_pred": y_pred,
            "required_instances": int(required_instances(y_pred, metric, window)),
            "instances": int(inst), "effective_instances": int(inst),
            "capacity_total": float(capacity_total),
            "headroom": float(headroom),
            "under_provision": float(under), "over_provision": float(over),
            "sla_violation": bool(under > 0.0),
            "cost_step": float(cost_step),
            "server_hours_step": float(inst * wh),
            "cost_rate_per_hour": float(inst * unit_cost),
        })
    sim_df = pd.DataFrame(rows)
    ev_df = pd.DataFrame(columns=["timestamp","metric","window","policy_mode","action","from_instances","to_instances","delta","reason"])
    return sim_df, ev_df

def simulate_predictive(df_case: pd.DataFrame, metric: str, window: str,
                        hysteresis_high: int, hysteresis_low: int, in_margin: float,
                        cooldown_minutes: float, max_step: int,
                        warmup_windows: int, min_uptime_windows: int,
                        enable_anom: bool, lookback_hours: float, mad_k: float, min_points: int,
                        enable_ddos: bool, ddos_consec: int, ddos_force_step: int, ddos_max: int,
                        lat_cfg: dict):
    df = df_case.sort_values("timestamp").reset_index(drop=True).copy()
    wh = win_hours(window)
    unit_cost = float(SC["cost_per_instance_per_hour"])

    cooldown_w = int(math.ceil(float(cooldown_minutes) / win_minutes(window)))

    inst = int(SC["min_instances"])
    above_count = below_count = 0
    cooldown_left = warmup_left = uptime_guard = 0
    rows, events = [], []

    # precompute anomaly/ddos flags from actual load (demo)
    if enable_anom:
        lookback_pts = max(5, int((lookback_hours * 60) / win_minutes(window)))
        an_score, is_spike = mad_anomaly_flags(df["y_true"], lookback_pts, mad_k, min_points)
        is_ddos = ddos_flag(is_spike, ddos_consec) if enable_ddos else pd.Series(np.zeros(len(df), dtype=int))
    else:
        an_score = pd.Series(np.zeros(len(df)))
        is_spike = pd.Series(np.zeros(len(df), dtype=int))
        is_ddos  = pd.Series(np.zeros(len(df), dtype=int))

    for i, r in df.iterrows():
        ts = r["timestamp"]
        y_true = float(r["y_true"])
        y_pred = float(r["y_pred"])

        req = required_instances(y_pred, metric, window)

        # hysteresis counters
        above_count = (above_count + 1) if (req > inst) else 0
        below_count = (below_count + 1) if (inst > req * (1.0 + float(in_margin))) else 0

        # tick timers
        cooldown_left = max(0, cooldown_left - 1)
        warmup_left = max(0, warmup_left - 1)
        uptime_guard = max(0, uptime_guard - 1)

        action, reason = "hold", "hold"
        new_inst = inst

        ddos_on = bool(enable_ddos and int(is_ddos.iloc[i]) == 1)

        # ddos mode: force scale-out
        if ddos_on:
            new_inst = min(int(ddos_max), inst + max(1, int(ddos_force_step)))
            action, reason = "scale_out", "ddos_mode(force_step)"

        # normal predictive scaling
        elif cooldown_left == 0:
            if above_count >= int(hysteresis_high):
                new_inst = _apply_step_towards(inst, req, int(max_step))
                action, reason = "scale_out", f"req>inst for {int(hysteresis_high)} window(s)"
            elif below_count >= int(hysteresis_low) and uptime_guard == 0:
                new_inst = _apply_step_towards(inst, req, int(max_step))
                action, reason = "scale_in", f"inst>req*(1+margin) for {int(hysteresis_low)} window(s)"

        if new_inst != inst:
            events.append({
                "timestamp": ts, "metric": metric, "window": window, "policy_mode": "predictive",
                "action": action, "from_instances": int(inst), "to_instances": int(new_inst),
                "delta": int(new_inst - inst), "reason": reason
            })
            inst = int(new_inst)
            cooldown_left = cooldown_w
            warmup_left = max(warmup_left, int(warmup_windows))
            uptime_guard = max(uptime_guard, int(min_uptime_windows))

        effective_inst = max(0, inst - warmup_left)
        capacity_total = effective_inst * cap(metric, window)
        headroom = capacity_total - y_true
        under = max(0.0, -headroom)
        over = max(0.0, headroom)
        cost_step = inst * wh * unit_cost

        rows.append({
            "timestamp": ts, "metric": metric, "window": window, "policy_mode": "predictive",
            "y_true": y_true, "y_pred": y_pred,
            "required_instances": int(req),
            "instances": int(inst),
            "effective_instances": int(effective_inst),
            "warmup_left_windows": int(warmup_left),
            "blocked_by_cooldown": bool(cooldown_left > 0),
            "capacity_total": float(capacity_total),
            "headroom": float(headroom),
            "under_provision": float(under), "over_provision": float(over),
            "sla_violation": bool(under > 0.0),
            "cost_step": float(cost_step),
            "server_hours_step": float(inst * wh),
            "cost_rate_per_hour": float(inst * unit_cost),
            "anomaly_score": float(an_score.iloc[i]),
            "is_spike": int(is_spike.iloc[i]),
            "is_ddos": int(is_ddos.iloc[i]),
        })

    sim_pred = pd.DataFrame(rows)
    ev_pred = pd.DataFrame(events)
    sim_pred = simulate_queue_latency(sim_pred, lat_cfg)
    return sim_pred, ev_pred


# ============================================================
# Sidebar (WORKS): use form -> Apply/Run
# ============================================================
st.title("Autoscaling Analysis — Forecast + Policy Simulation")

with st.sidebar:
    st.header("Controls")

    st.caption("Dataset split (read-only)")
    st.write("Train = Jul + 1–22 Aug")
    st.write("Test  = 23–31 Aug")

    with st.form("cfg_form"):
        st.subheader("A) Data / Range")
        # default test range as per spec
        test_start = st.date_input("Test start (inclusive)", value=pd.Timestamp("1995-08-23").date(), key="test_start")
        test_end   = st.date_input("Test end (exclusive)", value=pd.Timestamp("1995-09-01").date(), key="test_end")
        include_gaps = st.checkbox("Include gaps", value=False, key="include_gaps")

        st.subheader("B) Forecast")
        metric = st.selectbox("Target (metric)", CFG["TARGETS"], index=0, key="metric")
        window = st.selectbox("Window", CFG["TAGS"], index=1, key="window")
        model_tag = st.selectbox("Model", ["xgb", "seasonal_naive"], index=0, key="model")

        st.subheader("C) Autoscaling policy")
        policy_mode = st.selectbox("Policy mode", ["static_vs_predictive"], index=0, key="policy_mode")

        # allow user adjust core knobs
        buf = st.slider("Safety buffer", 0.0, 1.0, float(SC["safety_buffer_by_metric"].get(metric, 0.3)), 0.01, key="buf")
        min_ins = st.number_input("Min instances", 1, 500, int(SC["min_instances"]), key="min_ins")
        max_ins = st.number_input("Max instances", 1, 2000, int(SC["max_instances"]), key="max_ins")

        st.caption("Hysteresis / cooldown / step")
        hhigh = st.number_input("high (scale-out consecutive windows)", 1, 20, int(SC["hysteresis_by_window"][window]["high"]), key="hhigh")
        hlow  = st.number_input("low (scale-in consecutive windows)", 1, 50, int(SC["hysteresis_by_window"][window]["low"]), key="hlow")
        in_margin = st.slider("in_margin", 0.0, 1.0, float(SC["hysteresis_by_window"][window]["in_margin"]), 0.01, key="in_margin")
        cooldown_m = st.number_input("Cooldown minutes", 0.0, 120.0, float(SC["cooldown_minutes"]["base"]), step=1.0, key="cooldown_m")
        max_step = st.number_input("Step limit (max change per decision)", 1, 200, int(SC["max_step_change_by_window"][window]), key="max_step")

        st.caption("Provisioning")
        warmup_w = st.number_input("Warmup windows", 0, 50, int(SC["provisioning_by_window"][window]["warmup_windows"]), key="warmup_w")
        min_uptime_w = st.number_input("Min uptime windows", 0, 200, int(SC["provisioning_by_window"][window]["min_uptime_windows"]), key="min_uptime_w")

        st.subheader("D) Anomaly / DDoS")
        enable_anom = st.checkbox("Enable anomaly (MAD)", value=bool(SC["anomaly"]["enabled"]), key="enable_anom")
        lookback_h = st.number_input("lookback_hours", 0.5, 24.0, float(SC["anomaly"]["lookback_hours"]), step=0.5, key="lookback_h")
        mad_k = st.number_input("mad_k", 1.0, 20.0, float(SC["anomaly"]["mad_k"]), step=0.5, key="mad_k")
        min_pts = st.number_input("min_points", 1, 200, int(SC["anomaly"]["min_points"]), key="min_pts")

        enable_ddos = st.checkbox("Enable DDoS mode", value=bool(SC["ddos_mode"]["enabled"]), key="enable_ddos")
        ddos_consec = st.number_input("consecutive_windows", 1, 20, 3, key="ddos_consec")
        ddos_force_step = st.number_input("force_step", 1, 200, int(SC["ddos_mode"]["force_scale_out_step_by_window"][window]), key="ddos_force_step")
        ddos_max = st.number_input("max_instances_during_ddos", 1, 5000, int(SC["ddos_mode"]["max_instances_during_ddos"]), key="ddos_max")

        run = st.form_submit_button("Apply / Run simulation")

# ============================================================
# Apply sidebar -> update SC
# ============================================================
SC["safety_buffer_by_metric"][metric] = float(buf)
SC["min_instances"] = int(min_ins)
SC["max_instances"] = int(max_ins)

SC["hysteresis_by_window"][window]["high"] = int(hhigh)
SC["hysteresis_by_window"][window]["low"]  = int(hlow)
SC["hysteresis_by_window"][window]["in_margin"] = float(in_margin)
SC["cooldown_minutes"]["base"] = float(cooldown_m)
SC["max_step_change_by_window"][window] = int(max_step)
SC["provisioning_by_window"][window]["warmup_windows"] = int(warmup_w)
SC["provisioning_by_window"][window]["min_uptime_windows"] = int(min_uptime_w)

SC["anomaly"]["enabled"] = bool(enable_anom)
SC["anomaly"]["lookback_hours"] = float(lookback_h)
SC["anomaly"]["mad_k"] = float(mad_k)
SC["anomaly"]["min_points"] = int(min_pts)

SC["ddos_mode"]["enabled"] = bool(enable_ddos)
SC["ddos_mode"]["force_scale_out_step_by_window"][window] = int(ddos_force_step)
SC["ddos_mode"]["max_instances_during_ddos"] = int(ddos_max)

LAT_CFG = {
    "base_ms": float(SC["slo"]["base_latency_ms"]),
    "alpha_ms_per_queue_unit": float(SC["slo"]["alpha_latency_per_unit_queue"]),
    "p95_target_ms": float(SC["slo"]["p95_latency_target_ms"]),
    "queue_decay": 0.02,
}

# ============================================================
# Run simulation (only when button pressed OR first load)
# ============================================================
if "sim_pred" not in st.session_state:
    st.session_state["sim_pred"] = None
    st.session_state["sim_static"] = None
    st.session_state["ev_pred"] = None
    st.session_state["summary_df"] = None

def run_all():
    df_case = load_pred_case(metric, window, model_tag)

    # filter by selected date range (test)
    TEST_START = pd.Timestamp(test_start)
    TEST_END   = pd.Timestamp(test_end)  # exclusive (date_input gives date)
    df_case = df_case[(df_case["timestamp"] >= TEST_START) & (df_case["timestamp"] < TEST_END)].copy()
    df_case = df_case.sort_values("timestamp").reset_index(drop=True)

    # include/exclude gaps (if your pred files already removed gaps, this mostly no-op)
    if not include_gaps:
        # keep all rows (pred files generally already filtered)
        pass

    # "train" for static baseline (here: use df_case itself if no earlier history in this view)
    # If you want exact spec baseline (Jul + 1–22 Aug), you should load TRAIN pred file separately.
    # For demo: use robust fallback if train slice empty.
    TRAIN_END = pd.Timestamp("1995-08-23 00:00:00")
    df_train = df_case[df_case["timestamp"] < TRAIN_END].copy()
    if df_train.empty:
        cut = int(len(df_case) * 0.7)
        df_train = df_case.iloc[:cut].copy()

    static_req_train = df_train["y_true"].apply(lambda x: required_instances(x, metric, window))
    static_n = clamp_instances(int(np.nanpercentile(static_req_train.values, 95)))

    sim_static, ev_static = simulate_static(df_case, metric, window, static_n)
    sim_static = simulate_queue_latency(sim_static, LAT_CFG)

    sim_pred, ev_pred = simulate_predictive(
        df_case, metric, window,
        hysteresis_high=int(hhigh),
        hysteresis_low=int(hlow),
        in_margin=float(in_margin),
        cooldown_minutes=float(cooldown_m),
        max_step=int(max_step),
        warmup_windows=int(warmup_w),
        min_uptime_windows=int(min_uptime_w),
        enable_anom=bool(enable_anom),
        lookback_hours=float(lookback_h),
        mad_k=float(mad_k),
        min_points=int(min_pts),
        enable_ddos=bool(enable_ddos),
        ddos_consec=int(ddos_consec),
        ddos_force_step=int(ddos_force_step),
        ddos_max=int(ddos_max),
        lat_cfg=LAT_CFG,
    )

    summary_static = summarize(sim_static, ev_static)
    summary_pred   = summarize(sim_pred, ev_pred)
    summary_df = pd.DataFrame([summary_static, summary_pred])

    # Save artifacts (optional)
    os.makedirs(OUT_05, exist_ok=True)
    sim_all = pd.concat([sim_static, sim_pred], ignore_index=True)
    events_all = pd.concat([ev_static, ev_pred], ignore_index=True)
    sim_all.to_csv(os.path.join(OUT_05, "sim_timeseries_all.csv"), index=False)
    events_all.to_csv(os.path.join(OUT_05, "scaling_events_all.csv"), index=False)
    summary_df.to_csv(os.path.join(OUT_05, "summary_cost_perf.csv"), index=False)

    st.session_state["sim_pred"] = sim_pred
    st.session_state["sim_static"] = sim_static
    st.session_state["ev_pred"] = ev_pred
    st.session_state["summary_df"] = summary_df
    st.session_state["static_n"] = int(static_n)

if run or (st.session_state["sim_pred"] is None):
    try:
        run_all()
    except Exception as e:
        st.error(f"Run failed: {e}")
        st.stop()

sim_pred = st.session_state["sim_pred"]
sim_static = st.session_state["sim_static"]
ev_pred = st.session_state["ev_pred"]
summary_df = st.session_state["summary_df"]
static_n = st.session_state.get("static_n", None)

# ============================================================
# Metrics + KPIs
# ============================================================
metrics_long = load_metrics_long()
fk = get_forecast_kpis(metrics_long, model_tag, metric, window)

cost_static = float(summary_df[summary_df["policy_mode"]=="static"]["estimated_total_cost"].iloc[0])
cost_pred   = float(summary_df[summary_df["policy_mode"]=="predictive"]["estimated_total_cost"].iloc[0])
events_per_hour = float(summary_df[summary_df["policy_mode"]=="predictive"]["events_per_hour"].iloc[0])
sla_rate = float(summary_df[summary_df["policy_mode"]=="predictive"]["sla_violation_rate"].iloc[0])
slo_rate = float(summary_df[summary_df["policy_mode"]=="predictive"]["slo_violation_rate"].iloc[0])

# ============================================================
# Tabs
# ============================================================
tab_overview, tab_forecast, tab_scale, tab_cost, tab_anom = st.tabs(
    ["Overview", "Forecast", "Autoscaling", "Cost vs Reliability", "Anomaly/DDoS"]
)

# -----------------------------
# Overview
# -----------------------------
with tab_overview:
    st.subheader("KPI Summary")

    c1, c2, c3, c4, c5 = st.columns(5)
    c1.metric("RMSE (test)", "NA" if np.isnan(fk["RMSE"]) else f"{fk['RMSE']:.3f}")
    c2.metric("MAE (test)",  "NA" if np.isnan(fk["MAE"])  else f"{fk['MAE']:.3f}")
    c3.metric("MAPE (test)", "NA" if np.isnan(fk["MAPE"]) else f"{fk['MAPE']:.2f}%")
    c4.metric("Total cost (Static)", f"${cost_static:.2f}")
    c5.metric("Total cost (Predictive)", f"${cost_pred:.2f}", delta=f"{(cost_pred-cost_static):.2f}")

    c6, c7, c8, c9 = st.columns(4)
    c6.metric("SLA violation rate", f"{sla_rate*100:.2f}%")
    c7.metric("SLO violation rate", f"{slo_rate*100:.2f}%")
    c8.metric("# events (predictive)", f"{len(ev_pred) if ev_pred is not None else 0}")
    c9.metric("Events/hour", f"{events_per_hour:.2f}")

    st.caption(f"Static baseline instances (p95 train-style): {static_n}")

    # Overview charts
    ts = sim_pred["timestamp"]

    colA, colB = st.columns(2)
    with colA:
        fig = plt.figure(figsize=(12,3))
        plt.plot(ts, sim_pred["y_true"], label="Actual")
        plt.plot(ts, sim_pred["y_pred"], label="Forecast")
        plt.title("Actual vs Forecast"); plt.xlabel("time"); plt.grid(True); plt.legend()
        st.pyplot(fig, clear_figure=True)

    with colB:
        fig = plt.figure(figsize=(12,3))
        plt.plot(ts, sim_pred["required_instances"], label="Desired")
        plt.plot(ts, sim_pred["instances"], label="InService")
        plt.title("Desired vs InService"); plt.xlabel("time"); plt.grid(True); plt.legend()
        st.pyplot(fig, clear_figure=True)

    fig = plt.figure(figsize=(14,3))
    plt.plot(sim_static["timestamp"], sim_static["cost_step"].cumsum(), label="STATIC cumulative cost")
    plt.plot(sim_pred["timestamp"], sim_pred["cost_step"].cumsum(), label="PREDICTIVE cumulative cost")
    plt.title("Cumulative Cost"); plt.xlabel("time"); plt.grid(True); plt.legend()
    st.pyplot(fig, clear_figure=True)

# -----------------------------
# Forecast
# -----------------------------
with tab_forecast:
    st.subheader("Forecast diagnostics")

    ts = sim_pred["timestamp"]
    resid = sim_pred["y_true"] - sim_pred["y_pred"]

    col1, col2 = st.columns(2)
    with col1:
        fig = plt.figure(figsize=(12,3))
        plt.plot(ts, sim_pred["y_true"], label="Actual")
        plt.plot(ts, sim_pred["y_pred"], label="Forecast")
        plt.title("Actual vs Forecast"); plt.xlabel("time"); plt.grid(True); plt.legend()
        st.pyplot(fig, clear_figure=True)

        fig = plt.figure(figsize=(12,3))
        plt.plot(ts, resid)
        plt.title("Residual (y_true - y_pred)"); plt.xlabel("time"); plt.grid(True)
        st.pyplot(fig, clear_figure=True)

    with col2:
        fig = plt.figure(figsize=(12,3))
        plt.hist(resid.values, bins=60)
        plt.title("Residual distribution"); plt.xlabel("residual"); plt.ylabel("count"); plt.grid(True)
        st.pyplot(fig, clear_figure=True)

        fig = plt.figure(figsize=(12,3))
        plt.scatter(sim_pred["y_true"], sim_pred["y_pred"], s=8)
        plt.title("y_true vs y_pred"); plt.xlabel("y_true"); plt.ylabel("y_pred"); plt.grid(True)
        st.pyplot(fig, clear_figure=True)

    st.subheader("Benchmark table (from metrics_forecast.csv)")
    if metrics_long.empty:
        st.info("metrics_forecast.csv not found yet.")
    else:
        test_m = metrics_long[metrics_long["split"].astype(str).str.lower().eq("test")].copy()
        bench = test_m.pivot_table(
            index=["target","window","metric"],
            columns=["model"],
            values="value",
            aggfunc="first"
        ).reset_index()
        st.dataframe(bench.sort_values(["target","window","metric"]), use_container_width=True)

# -----------------------------
# Autoscaling
# -----------------------------
with tab_scale:
    st.subheader("Policy simulation")

    ts = sim_pred["timestamp"]

    fig = plt.figure(figsize=(14,3))
    plt.plot(ts, sim_pred["required_instances"], label="Desired")
    plt.plot(ts, sim_pred["instances"], label="InService")
    plt.title("Desired vs InService (NO event lines)"); plt.xlabel("time"); plt.ylabel("# instances")
    plt.grid(True); plt.legend()
    st.pyplot(fig, clear_figure=True)

    fig = plt.figure(figsize=(14,3))
    plt.plot(ts, sim_pred["headroom"], label="Headroom = capacity - load")
    plt.axhline(0, linewidth=1)
    plt.title("Capacity Headroom (negative => SLA risk)"); plt.xlabel("time")
    plt.grid(True); plt.legend()
    st.pyplot(fig, clear_figure=True)

    col1, col2 = st.columns(2)
    with col1:
        dist = instance_distribution(sim_pred)
        fig = plt.figure(figsize=(12,3))
        plt.bar(dist["instances"].astype(str), dist["pct_time"])
        plt.title("Instance distribution (% time)"); plt.xlabel("# instances"); plt.ylabel("% time")
        plt.grid(True, axis="y")
        st.pyplot(fig, clear_figure=True)

    with col2:
        counts = daily_event_counts(ev_pred)
        if counts.empty:
            st.info("No scaling events in this slice.")
        else:
            counts["date"] = pd.to_datetime(counts["date"])
            fig = plt.figure(figsize=(12,3))
            plt.bar(counts["date"], counts["scale_out"], label="scale-out/day")
            plt.bar(counts["date"], counts["scale_in"], bottom=counts["scale_out"], label="scale-in/day")
            plt.title("Scaling frequency (events/day)"); plt.xlabel("date"); plt.ylabel("# events/day")
            plt.grid(True, axis="y"); plt.legend()
            st.pyplot(fig, clear_figure=True)

    st.subheader("Scaling events")
    if ev_pred is None or ev_pred.empty:
        st.info("No events.")
    else:
        st.dataframe(
            ev_pred.sort_values("timestamp").reset_index(drop=True),
            use_container_width=True
        )

# -----------------------------
# Cost vs Reliability
# -----------------------------
with tab_cost:
    st.subheader("Cost vs Reliability")

    fig = plt.figure(figsize=(14,3))
    plt.plot(sim_static["timestamp"], sim_static["cost_step"].cumsum(), label="STATIC cumulative cost")
    plt.plot(sim_pred["timestamp"], sim_pred["cost_step"].cumsum(), label="PREDICTIVE cumulative cost")
    plt.title("Cumulative cost"); plt.xlabel("time"); plt.ylabel("$")
    plt.grid(True); plt.legend()
    st.pyplot(fig, clear_figure=True)

    p95_target = float(LAT_CFG["p95_target_ms"])
    fig = plt.figure(figsize=(14,3))
    plt.plot(sim_pred["timestamp"], sim_pred["p95_latency_ms"], label="p95 latency (ms)")
    plt.axhline(p95_target, linewidth=1, label="SLO target")
    plt.title("p95 latency vs SLO target"); plt.xlabel("time"); plt.ylabel("ms")
    plt.grid(True); plt.legend()
    st.pyplot(fig, clear_figure=True)

    col1, col2 = st.columns(2)
    with col1:
        fig = plt.figure(figsize=(12,3))
        plt.plot(sim_pred["timestamp"], sim_pred["queue_len"])
        plt.title("Queue length"); plt.xlabel("time"); plt.grid(True)
        st.pyplot(fig, clear_figure=True)

    with col2:
        fig = plt.figure(figsize=(12,3))
        plt.plot(sim_pred["timestamp"], sim_pred["utilization"])
        plt.title("Utilization"); plt.xlabel("time"); plt.grid(True)
        st.pyplot(fig, clear_figure=True)

# -----------------------------
# Anomaly / DDoS
# -----------------------------
with tab_anom:
    st.subheader("Spike / DDoS flags (bonus)")

    ts = sim_pred["timestamp"]
    fig = plt.figure(figsize=(14,3))
    plt.plot(ts, sim_pred["y_true"], label="Actual load")
    sp = sim_pred[sim_pred["is_spike"] == 1]
    dd = sim_pred[sim_pred["is_ddos"] == 1]
    if not sp.empty:
        plt.scatter(sp["timestamp"], sp["y_true"], s=18, label="Spike (MAD)")
    if not dd.empty:
        plt.scatter(dd["timestamp"], dd["y_true"], s=26, label="DDoS (consecutive spikes)")
    plt.title("Actual load + spike/ddos markers"); plt.xlabel("time"); plt.ylabel(f"{metric}/{window}")
    plt.grid(True); plt.legend()
    st.pyplot(fig, clear_figure=True)

    fig = plt.figure(figsize=(14,3))
    plt.plot(ts, sim_pred["anomaly_score"])
    plt.title("Anomaly score (MAD z-like)"); plt.xlabel("time"); plt.grid(True)
    st.pyplot(fig, clear_figure=True)

    st.subheader("Top anomalies")
    topa = sim_pred.sort_values("anomaly_score", ascending=False).head(50)[
        ["timestamp","y_true","y_pred","anomaly_score","is_spike","is_ddos","instances","required_instances","headroom"]
    ]
    st.dataframe(topa, use_container_width=True)

# ============================================================
# Export buttons
# ============================================================
st.divider()
colx, coly, colz = st.columns(3)
with colx:
    st.download_button(
        "Download summary_cost_perf.csv",
        data=summary_df.to_csv(index=False).encode("utf-8-sig"),
        file_name="summary_cost_perf.csv",
        mime="text/csv",
    )
with coly:
    st.download_button(
        "Download sim_timeseries_predictive.csv",
        data=sim_pred.to_csv(index=False).encode("utf-8-sig"),
        file_name="sim_timeseries_predictive.csv",
        mime="text/csv",
    )
with colz:
    if ev_pred is None:
        ev_bytes = pd.DataFrame().to_csv(index=False).encode("utf-8-sig")
    else:
        ev_bytes = ev_pred.to_csv(index=False).encode("utf-8-sig")
    st.download_button(
        "Download scaling_events_predictive.csv",
        data=ev_bytes,
        file_name="scaling_events_predictive.csv",
        mime="text/csv",
    )
'''

with open(app_path, "w", encoding="utf-8") as f:
    f.write(APP)
print("✅ generated:")
print("-", str(app_path))
print("-", str(cfg_scaling_path))
print(r"\nRun: streamlit run notebooks\streamlit_app.py")




✅ generated:
- C:\Users\PC\OneDrive - National Economics University\Máy tính\SC\AUTOSCALING-ANALYSIS\notebooks\streamlit_app.py
- C:\Users\PC\OneDrive - National Economics University\Máy tính\SC\AUTOSCALING-ANALYSIS\notebooks\outputs\05_scaling\_cfg_scaling.json
\nRun: streamlit run notebooks\streamlit_app.py
