In [19]:
import pandas as pd
from pathlib import Path

path = Path('../k6/out/stg-cloud-fe-stress.csv')
if not path.exists():
    raise FileNotFoundError(f"[ERROR] File not found: {path}")

try:
    df = pd.read_csv(path)
except Exception as e:
    raise ValueError(f"[ERROR] Failed to parse CSV: {e}")

# 핵심 컬럼 타입 정제
df["metric_value"] = pd.to_numeric(df["metric_value"], errors="coerce")
df["timestamp"] = pd.to_numeric(df["timestamp"], errors="coerce")

# NaN 행 제거
df = df.dropna(subset=["metric_name", "metric_value", "timestamp"])

# 필요한 열만 남기기
keep_columns = [
    "metric_name",      # 어떤 메트릭인지 (http_req_duration 등)
    "timestamp",        # UNIX time (초 단위)
    "metric_value",     # 수치값
    "check",            # checks 이름
    "url",              # 요청 URL
    "status",           # HTTP 상태 코드
    "error"             # 에러 내용
]
# df = df[[col for col in keep_columns if col in df.columns]]
metric = 'data_sent'

df_filtered = df[(df["metric_name"] == metric)]
df_filtered


Unnamed: 0,metric_name,timestamp,metric_value,check,error,error_code,expected_response,group,method,name,proto,scenario,service,status,subproto,tls_version,url,extra_tags,metadata
3012,data_sent,1745230243,195764.0,,,,,,,,,default,,,,,,,
5602,data_sent,1745230293,151403.0,,,,,,,,,default,,,,,,,
8108,data_sent,1745230328,98627.0,,,,,,,,,default,,,,,,,
10655,data_sent,1745230369,78730.0,,,,,,,,,default,,,,,,,
11527,data_sent,1745230377,63239.0,,,,,,,,,default,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287202,data_sent,1745231674,23199.0,,,,,,,,,default,,,,,,,
287278,data_sent,1745231676,22609.0,,,,,,,,,default,,,,,,,
287311,data_sent,1745231677,33874.0,,,,,,,,,default,,,,,,,
287333,data_sent,1745231679,27394.0,,,,,,,,,default,,,,,,,


In [14]:
import pandas as pd
import numpy as np

def calculate_stats(df: pd.DataFrame, metric: str = "http_req_duration") -> pd.DataFrame:
    """
    특정 metric_name 에 대해 URL 별 latency 통계 분석
    """
    df_filtered = df[df["metric_name"] == metric]

    if df_filtered.empty:
        print(f"[WARN] No data for metric: {metric}")
        return pd.DataFrame()

    # 그룹 기준: URL
    group = df_filtered.groupby("url")["metric_value"]

    result = group.agg([
        ("count", "count"),
        ("avg", "mean"),
        ("min", "min"),
        ("max", "max"),
        ("p50", lambda x: np.percentile(x, 50)),
        ("p90", lambda x: np.percentile(x, 90)),
        ("p95", lambda x: np.percentile(x, 95)),
        ("p99", lambda x: np.percentile(x, 99)),
    ]).reset_index()

    return result


def calculate_error_summary(df: pd.DataFrame) -> pd.DataFrame:
    """
    error 필드가 있는 행을 분석 하여 error 별 발생 횟수 집계
    """
    error_df = df[(df["metric_name"] == "error") & (df["error"].notna())]
    if error_df.empty:
        return pd.DataFrame(columns=["error_type", "count"])

    summary = error_df["error"].value_counts().reset_index()
    summary.columns = ["error_type", "count"]
    return summary


def calculate_failure_rate(df: pd.DataFrame) -> pd.DataFrame:
    """
    http_req_failed 가 1인 요청만 필터링 하여 실패율 계산
    """
    failed_df = df[df["metric_name"] == "http_req_failed"]
    if failed_df.empty:
        return pd.DataFrame(columns=["url", "failures"])

    summary = (
        failed_df.groupby("url")["metric_value"]
        .sum()
        .reset_index()
        .rename(columns={"metric_value": "failures"})
    )
    return summary