In [3]:
# env setup
!pip install pandas pyarrow tqdm regex seaborn matplotlib

import os
from pathlib import Path

# BASE_DIR = Path(".").resolve()
# DATA_DIR = BASE_DIR / "parquet"
# RESULTS_DIR = BASE_DIR / "results2"
# CONTEXT_DIR = RESULTS_DIR / "contexts2"
# LLM_DIR = RESULTS_DIR / "llm_results2"

# for d in [RESULTS_DIR, CONTEXT_DIR, LLM_DIR]:
#     d.mkdir(exist_ok = True)

# print("Directories ready:")
# print(BASE_DIR, DATA_DIR, RESULTS_DIR)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
import pandas as pd
from pathlib import Path
import random

RESULTS_DIR = Path("results2")
CONTEXT_DIR = RESULTS_DIR / "contexts2"

failed_prs = pd.read_csv(RESULTS_DIR / "failed_prs_refined.csv")

# figure out which PR IDs have context files
available_ids = []
for p in CONTEXT_DIR.glob("PR_*.txt"):
    try:
        # PR_12345.txt -> 12345
        pid = int(p.stem.split("_")[1])
        available_ids.append(pid)
    except Exception:
        continue

available_ids = set(available_ids)
failed_ids = set(failed_prs["id"].astype(int))

candidate_ids = sorted(failed_ids & available_ids)
print(f"Failed PRs with context files: {len(candidate_ids)}")

# sample 20 for a first test
SAMPLE_SIZE = 20
if len(candidate_ids) < SAMPLE_SIZE:
    sample_ids = candidate_ids
else:
    sample_ids = random.sample(candidate_ids, SAMPLE_SIZE)

print("Sample PR IDs:", sample_ids)


Failed PRs with context files: 7657
Sample PR IDs: [3123127952, 3070955999, 3071006200, 3034597936, 3081057621, 3219603517, 3070937073, 3075266937, 3104618544, 3037965543, 2914387943, 3070146158, 3056288832, 2876487591, 2990048496, 2893661373, 3131654577, 3158731964, 2874576529, 3222537930]


In [5]:
BASE_PROMPT = r"""
You are an assistant that classifies why GitHub pull requests failed.

A "failed" pull request in this task is one that was CLOSED WITHOUT being merged, or has remained OPEN and STALE for a long time.

Your goal is to read the pull request context and decide the SINGLE most likely primary reason why it did not get merged, according to the taxonomy below.

If there is genuinely not enough information in the context to infer a reason, you MUST use the UNKNOWN label (U1_UNKNOWN).
Do not guess when the evidence is missing or extremely weak.

TAXONOMY (reason_label ‚Üí meaning):

- T1_CI_OR_TEST_FAILURE
  The PR was not merged because its tests, build, or CI checks failed, and those failures were never resolved.

- T2_CODE_QUALITY_OR_CORRECTNESS
  The PR was not merged because reviewers identified code-level problems such as logic bugs, unsafe changes, missing tests, poor design, or style violations.

- F1_REQUIREMENT_MISMATCH_OR_NOT_NEEDED
  The PR was not merged because the change did not match project requirements or direction (feature not desired, out of scope, wrong approach), even if technically possible.

- P1_LOW_PRIORITY_OR_STALE
  The PR was not merged because it was not prioritized or reviewed, became stale or stuck in the backlog, and eventually was closed mainly due to inactivity or lack of attention.

- P2_AUTHOR_UNRESPONSIVE_OR_WITHDREW
  The PR was not merged because the author stopped responding to feedback, did not address required changes, or explicitly abandoned or closed the PR.

- P3_REVIEW_OR_SOCIAL_CONFLICT
  The PR was not merged because of unresolved disagreement, unclear ownership, or other social/coordination issues in the review process, not primarily technical problems.

- P4_POLICY_OR_COMPLIANCE_ISSUE
  The PR was not merged because it violated repository or process requirements (e.g., wrong branch, missing CLA, missing required metadata or templates), not because of the technical content.

- R1_DUPLICATE_OR_ALREADY_FIXED
  The PR was not merged because another PR or commit already implemented the same change or fix, making this PR redundant.

- R2_OBSOLETE_DUE_TO_PROJECT_CHANGE
  The PR was not merged because the codebase or project direction changed so much that the PR became outdated or irrelevant (e.g., major refactor, feature removed).

- S1_INVALID_OR_SPAM
  The PR was not merged because it is not a legitimate contribution (spam, nonsense, accidental or meaningless changes).

- U1_UNKNOWN
  The PR context does not provide enough information to infer a plausible reason. Use this when there is no clear signal.

TASK:

1. Read the PR context carefully.
2. Decide the SINGLE most likely primary reason_label from the taxonomy above.
3. Write a short reason_summary (1‚Äì2 sentences) explaining why you chose that label.
4. Estimate a numeric confidence between 0 and 1 for how sure you are.

IMPORTANT:
- Choose exactly ONE reason_label.
- Do NOT invent details that are not supported by the context.
- If the signals are mixed, choose the reason that best explains why the PR ultimately failed.
- If there is not enough information, use U1_UNKNOWN.

OUTPUT FORMAT (JSON ONLY):

You MUST output exactly one JSON object with this schema:

{
  "pr_id": <integer>,
  "reason_label": "<one of T1_CI_OR_TEST_FAILURE, T2_CODE_QUALITY_OR_CORRECTNESS, F1_REQUIREMENT_MISMATCH_OR_NOT_NEEDED, P1_LOW_PRIORITY_OR_STALE, P2_AUTHOR_UNRESPONSIVE_OR_WITHDREW, P3_REVIEW_OR_SOCIAL_CONFLICT, P4_POLICY_OR_COMPLIANCE_ISSUE, R1_DUPLICATE_OR_ALREADY_FIXED, R2_OBSOLETE_DUE_TO_PROJECT_CHANGE, S1_INVALID_OR_SPAM, U1_UNKNOWN>",
  "reason_summary": "<1‚Äì2 sentence natural-language explanation>",
  "confidence": <float between 0 and 1>
}

- pr_id MUST match the pr_id given below.
- Do NOT include any extra keys.
- Do NOT include any text before or after the JSON.

Now read the PR context below and produce the JSON output.

PR_ID: {pr_id}

PR CONTEXT:
<<<
{pr_context}
>>>
"""

def build_prompt(pr_id: int, pr_context: str) -> str:
    """
    Insert the PR id and context into the base prompt.
    """
    return BASE_PROMPT.format(pr_id=pr_id, pr_context=pr_context)


In [None]:
!pip install --quiet openai
from pathlib import Path
import pandas as pd
import random
import json
import re

from openai import OpenAI


In [None]:
# üîå Connect to LM Studio local server
# LM Studio default: http://localhost:1234/v1
client = OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio"  # any non-empty string works
)

# ‚ö†Ô∏è IMPORTANT:
# In LM Studio's UI, the model has a name, e.g.:
#   "Llama-3.2-3B-Instruct-Q4_K_S"
# or similar.
# Copy that EXACT model name from LM Studio and paste it below:
LMSTUDIO_MODEL_NAME = "llama-3.2-3b-instruct"  # <-- change if needed


def run_llm(prompt: str) -> str:
    """
    Call your LM Studio model with the given prompt.
    Returns the raw text output from the model.
    """
    response = client.chat.completions.create(
        model=LMSTUDIO_MODEL_NAME,
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
        max_tokens=512,
    )
    return response.choices[0].message.content


In [None]:
import json
import re

def extract_json_obj(text: str) -> dict:
    """
    Extract the first JSON object { ... } from the text and parse it.
    Raises ValueError if no JSON object is found or parsing fails.
    """
    match = re.search(r"\{.*\}", text, re.DOTALL)
    if not match:
        raise ValueError("No JSON object found in model output.")
    json_str = match.group(0)
    return json.loads(json_str)


In [None]:
import tqdm

rows = []

for pr_id in tqdm.tqdm(sample_ids):
    path = CONTEXT_DIR / f"PR_{pr_id}.txt"
    if not path.exists():
        print(f"Missing context for PR {pr_id}, skipping.")
        continue

    with open(path, "r", encoding="utf-8") as f:
        pr_context = f.read()

    prompt = build_prompt(pr_id, pr_context)
    raw_output = run_llm(prompt)

    try:
        obj = extract_json_obj(raw_output)
    except Exception as e:
        print(f"Failed to parse JSON for PR {pr_id}: {e}")
        print("Raw output was:\n", raw_output[:500])
        continue

    # coerce types a bit defensively
    obj["pr_id"] = int(obj.get("pr_id", pr_id))
    try:
        obj["confidence"] = float(obj.get("confidence", 0.0))
    except Exception:
        obj["confidence"] = 0.0

    rows.append(obj)

labels_sample_df = pd.DataFrame(rows)
print("Got labels for", len(labels_sample_df), "PRs")

out_parquet = RESULTS_DIR / "failed_pr_labels_sample.parquet"
out_csv     = RESULTS_DIR / "failed_pr_labels_sample.csv"

labels_sample_df.to_parquet(out_parquet, index=False)
labels_sample_df.to_csv(out_csv, index=False)

labels_sample_df.head()
