# Failure Landscape Notebook (B-v2-step2-2)

목적: **Stage-structured failure를 회복하기 위한 Diff-Formatter 실험 관찰**
> 아래 구조를 통해 git apply 가능한 unified diff로 정규화(normalize)하는 시도

- single sLM
- 1차: Generator
- 2차: Formatter (동일 모델 2-call)

### P0 : 
- formatter 성공률 -> git_apply_check_failed 가 얼마나 발생 / corrupt_patch?!

In [1]:
import pandas as pd
import numpy as np
import re
from pathlib import Path

# ---- load ----
# 이미 results_csv 변수가 있으면 그걸 쓰고, 없으면 latest를 찾음
def find_latest_results_csv(runs_dir: Path) -> Path:
    cands = sorted(runs_dir.glob('*/results.csv'), key=lambda p: p.stat().st_mtime, reverse=True)
    if not cands:
        raise FileNotFoundError(f'No results.csv found under: {runs_dir}')
    return cands[0]

RUNS_DIR = Path('..').resolve()
RESULTS_CSV = RUNS_DIR / "results.csv"
ANALYSIS_DIR = RUNS_DIR / "analysis"

print("Using results:", RESULTS_CSV)

df = pd.read_csv(RESULTS_CSV)
print("rows:", len(df), "cols:", len(df.columns))

Using results: /home/dibaeck/workspace/project_IR_sLM_MAS/runs_archive/exp1_B-v2-step2-3_Always-Formatter_p1/results.csv
rows: 200 cols: 25


In [2]:
# ---- safe normalize ----
for c in ["format_used","format_ok","format_reason","signature","error_type"]:
    if c not in df.columns:
        df[c] = np.nan

def to_bool_series(s):
    if s.dtype == bool:
        return s.fillna(False)
    return s.astype(str).str.strip().str.lower().isin(["1","true","t","yes","y"])

df["format_used"] = to_bool_series(df["format_used"].fillna(False))
df["format_ok"] = to_bool_series(df["format_ok"].fillna(False))

df["format_reason"] = df["format_reason"].fillna("").astype(str)
df["signature"] = df["signature"].fillna("").astype(str)
df["error_type"] = df["error_type"].fillna("").astype(str)


In [3]:
# ---- metrics ----
total = len(df)
fmt_used = int(df["format_used"].sum())
fmt_used_ratio = fmt_used / total if total else 0.0

fmt_ok = int((df["format_used"] & df["format_ok"]).sum())
fmt_ok_ratio = fmt_ok / fmt_used if fmt_used else 0.0

summary = pd.Series({
    "total_rows": total,
    "formatter_used_count": fmt_used,
    "formatter_used_ratio": round(fmt_used_ratio, 4),
    "formatter_success_count": fmt_ok,
    "formatter_success_rate_given_used": round(fmt_ok_ratio, 4),
})
display(summary)

total_rows                           200.0000
formatter_used_count                 191.0000
formatter_used_ratio                   0.9550
formatter_success_count              186.0000
formatter_success_rate_given_used      0.9738
dtype: float64

In [4]:
# ---- formatter 전 reason top-k ----
# formatter가 호출된 row에서 format_reason이 "전 invalid reason" 역할
pre_reason = (
    df.loc[df["format_used"], "format_reason"]
      .replace("", "UNKNOWN_PRE_REASON")
)
print("\n[Top-K] Pre (trigger) invalid reasons (format_reason) among formatter_used:")
display(pre_reason.value_counts().head(10))



[Top-K] Pre (trigger) invalid reasons (format_reason) among formatter_used:


format_reason
git_apply_corrupt_patch:error: corrupt patch at line 13    42
git_apply_corrupt_patch:error: corrupt patch at line 12    28
git_apply_corrupt_patch:error: corrupt patch at line 26    11
git_apply_corrupt_patch:error: corrupt patch at line 14    11
git_apply_corrupt_patch:error: corrupt patch at line 20     8
git_apply_corrupt_patch:error: corrupt patch at line 15     7
git_apply_corrupt_patch:error: corrupt patch at line 11     7
git_apply_corrupt_patch:error: corrupt patch at line 19     6
git_apply_corrupt_patch:error: corrupt patch at line 10     6
git_apply_corrupt_patch:error: corrupt patch at line 22     5
Name: count, dtype: int64

In [5]:
# ---- formatter 후 invalid top-k ----
# "후 invalid"는 formatter까지 했는데도 최종적으로 GEN_FAIL(invalid/empty)로 남은 것들
# 판단 기준: format_used=True AND error_type==GEN_FAIL AND signature in {invalid_diff_format, empty_diff}
post_mask = (
    df["format_used"]
    & (df["error_type"] == "GEN_FAIL")
    & (df["signature"].isin(["invalid_diff_format", "empty_diff"]))
)

# 후 reason은 가능한 한 구체적으로:
# 1) format_reason에 formatter_exception / 원인 문자열이 있으면 그걸 사용
# 2) 아니면 signature로 fallback
post_reason = df.loc[post_mask, "format_reason"].copy()
post_reason = post_reason.where(post_reason.str.len() > 0, df.loc[post_mask, "signature"])
post_reason = post_reason.replace("", "UNKNOWN_POST_REASON")

print("\n[Top-K] Post invalid reasons among formatter_used BUT still GEN_FAIL:")
display(post_reason.value_counts().head(10))


[Top-K] Post invalid reasons among formatter_used BUT still GEN_FAIL:


format_reason
git_apply_corrupt_patch:error: corrupt patch at line 13    42
git_apply_corrupt_patch:error: corrupt patch at line 12    28
git_apply_corrupt_patch:error: corrupt patch at line 26    11
git_apply_corrupt_patch:error: corrupt patch at line 14    11
git_apply_corrupt_patch:error: corrupt patch at line 20     8
git_apply_corrupt_patch:error: corrupt patch at line 15     7
git_apply_corrupt_patch:error: corrupt patch at line 11     7
git_apply_corrupt_patch:error: corrupt patch at line 19     6
git_apply_corrupt_patch:error: corrupt patch at line 10     6
git_apply_corrupt_patch:error: corrupt patch at line 21     5
Name: count, dtype: int64

In [6]:
# ---- bonus: formatter 성공 케이스에서 pre_reason이 뭐였는지 top-k ----
ok_mask = df["format_used"] & df["format_ok"]
ok_pre_reason = (
    df.loc[ok_mask, "format_reason"]
      .replace("", "UNKNOWN_PRE_REASON")
)
print("\n[Top-K] Pre reasons for which formatter succeeded:")
display(ok_pre_reason.value_counts().head(10))


[Top-K] Pre reasons for which formatter succeeded:


format_reason
git_apply_corrupt_patch:error: corrupt patch at line 13    42
git_apply_corrupt_patch:error: corrupt patch at line 12    28
git_apply_corrupt_patch:error: corrupt patch at line 26    11
git_apply_corrupt_patch:error: corrupt patch at line 14    11
git_apply_corrupt_patch:error: corrupt patch at line 20     8
git_apply_corrupt_patch:error: corrupt patch at line 11     7
git_apply_corrupt_patch:error: corrupt patch at line 15     7
git_apply_corrupt_patch:error: corrupt patch at line 10     6
git_apply_corrupt_patch:error: corrupt patch at line 19     6
git_apply_corrupt_patch:error: corrupt patch at line 22     5
Name: count, dtype: int64

### 추가 분석 : formatter가 왜 안불렸지??

In [7]:
df0 = df.copy()
for c in ["stage","error_type","signature","format_reason"]:
    if c in df0.columns:
        df0[c] = df0[c].fillna("").astype(str)

def to_bool(s):
    if s.name not in df0.columns: 
        return pd.Series([False]*len(df0))
    if pd.api.types.is_bool_dtype(s):
        return s.fillna(False)
    return s.astype(str).str.strip().str.lower().isin(["1","true","t","yes","y"])

for c in ["format_used","format_ok"]:
    df0[c] = to_bool(df0[c]) if c in df0.columns else False

N = len(df0)

# 1) formatter trigger 분포
used_df = df0[df0["format_used"]]
print("formatter_used:", len(used_df), "/", N)

print("\n=== formatter trigger reasons ===")
display(used_df["format_reason"].replace("", "MISSING").value_counts())

# 2) PATCH_FAIL signature 분포(전체)
patch = df0[(df0["stage"]=="PATCH") & (df0["error_type"]=="PATCH_FAIL")]
print("\nPATCH_FAIL rows:", len(patch))
print("\n=== PATCH_FAIL signature top ===")
display(patch["signature"].value_counts().head(10))

# 3) formatter가 불린 케이스의 최종 stage/error/signature는?
print("\n=== formatter-used rows final outcomes ===")
cols = [c for c in ["task_id","stage","error_type","signature","format_used","format_ok","format_reason"] if c in df0.columns]
display(used_df[cols].head(20))

# 4) 핵심 진단: corrupt_patch가 formatter trigger가 아닌지 확인
corrupt = patch[patch["signature"].eq("git_apply_corrupt_patch")]
print("\ncorrupt_patch:", len(corrupt), "/", len(patch) if len(patch) else 0)

corrupt_used = corrupt["format_used"].sum() if "format_used" in corrupt.columns else 0
print("corrupt_patch with formatter_used:", int(corrupt_used))

formatter_used: 191 / 200

=== formatter trigger reasons ===


format_reason
git_apply_corrupt_patch:error: corrupt patch at line 13                                                                                                                                                                                                                                                    42
git_apply_corrupt_patch:error: corrupt patch at line 12                                                                                                                                                                                                                                                    28
git_apply_corrupt_patch:error: corrupt patch at line 26                                                                                                                                                                                                                                                    11
git_apply_corrupt_patch:error: corrupt patch at line 14                         


PATCH_FAIL rows: 0

=== PATCH_FAIL signature top ===


Series([], Name: count, dtype: int64)


=== formatter-used rows final outcomes ===


Unnamed: 0,task_id,stage,error_type,signature,format_used,format_ok,format_reason
0,astropy__astropy-12907,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
1,astropy__astropy-14182,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
2,astropy__astropy-14365,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
3,astropy__astropy-14995,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
4,astropy__astropy-6938,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
5,astropy__astropy-7746,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
6,django__django-10914,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
7,django__django-10924,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
8,django__django-11001,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...
9,django__django-11019,GEN,GEN_FAIL,invalid_diff_format,True,True,git_apply_corrupt_patch:error: corrupt patch a...



corrupt_patch: 0 / 0
corrupt_patch with formatter_used: 0


### P0

In [8]:
# format_reason 정규화
s = df["format_reason"].fillna("").astype(str).str.strip()

# check-triggered bucket prefix 정의
PREFIXES = (
    "git_apply_corrupt_patch",
    "git_apply_hunk_failed",
    "git_apply_path_missing",
    "git_apply_other",
    "git_apply_check_failed",  # 혹시 남아있을 경우 대비
)

# "check-triggered" 판정: prefix로 시작하는지
mask_check = s.str.startswith(PREFIXES)

print("check-triggered count:")
print(int(mask_check.sum()))

print("\n[Breakdown] check-triggered buckets (prefix only):")
bucket = (
    s[mask_check]
    .str.split(":", n=1)   # "bucket:hint" -> "bucket"
    .str[0]
    .replace("", "EMPTY")
)
display(bucket.value_counts())

print("\n[Top-20] check-triggered hints (after bucket removed):")
hint = (
    s[mask_check]
    .str.split(":", n=1)
    .str[1]
    .fillna("")
    .replace("", "NO_HINT")
)
display(hint.value_counts().head(20))


check-triggered count:
186

[Breakdown] check-triggered buckets (prefix only):


format_reason
git_apply_corrupt_patch    175
git_apply_hunk_failed        9
git_apply_path_missing       2
Name: count, dtype: int64


[Top-20] check-triggered hints (after bucket removed):


format_reason
error: corrupt patch at line 13    42
error: corrupt patch at line 12    28
error: corrupt patch at line 26    11
error: corrupt patch at line 14    11
error: corrupt patch at line 20     8
error: corrupt patch at line 11     7
error: corrupt patch at line 15     7
error: corrupt patch at line 10     6
error: corrupt patch at line 19     6
error: corrupt patch at line 22     5
error: corrupt patch at line 21     5
error: corrupt patch at line 30     4
error: corrupt patch at line 9      4
error: corrupt patch at line 33     3
error: corrupt patch at line 25     3
error: corrupt patch at line 32     3
error: corrupt patch at line 38     2
error: corrupt patch at line 23     2
error: corrupt patch at line 36     2
error: corrupt patch at line 16     2
Name: count, dtype: int64