<a href="https://colab.research.google.com/github/an-311/LLM_Pruning_strategy/blob/main/Grp_Proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

!pip install -q --upgrade openai anthropic tqdm


from google.colab import drive
import os, json, statistics

print(" Mounting Google Drive...")
drive.mount('/content/drive')
print("✓ Drive mounted successfully!\n")


print("Please provide the path to your JSONL file in Google Drive.")
print("Example paths:")
print("  - /content/drive/MyDrive/response_raw.jsonl")
print("  - /content/drive/MyDrive/datasets/response_raw.jsonl\n")

input_file = input("Enter the full path to your JSONL file: ").strip()


if os.path.exists(input_file):
    file_size_mb = os.path.getsize(input_file) / (1024 * 1024)
    print(f"✓ Found file: {input_file} ({file_size_mb:.1f} MB)")
else:
    raise FileNotFoundError(f"File not found: {input_file}")


sample_count = sum(1 for line in open(input_file) if line.strip())
print(f"\n Dataset contains {sample_count} samples")


from getpass import getpass

print("\n Choose your LLM provider:")
print("1 = OpenAI (recommended: gpt-4o-mini)")
print("2 = Anthropic (claude-3-5-sonnet-20241022)")
print("3 = Mock (for testing without API costs)")

provider_choice = input("Enter choice (1/2/3): ").strip()

if provider_choice == "1":
    provider = "openai"
    model = "gpt-4o-mini"
    api_key = getpass("Enter your OpenAI API key: ")
    os.environ["OPENAI_API_KEY"] = api_key
elif provider_choice == "2":
    provider = "anthropic"
    model = "claude-3-5-sonnet-20241022"
    api_key = getpass("Enter your Anthropic API key: ")
    os.environ["ANTHROPIC_API_KEY"] = api_key
elif provider_choice == "3":
    provider = "mock"
    model = "mock"
else:
    raise ValueError("Invalid choice")

print(f" Using provider: {provider}, model: {model}")

!rm -rf LLM_Pruning_strategy
!git clone https://github.com/an-311/LLM_Pruning_strategy/
%cd LLM_Pruning_strategy

!mkdir -p judges
with open("judges/__init__.py", "w") as f: f.write("# package init\n")

judge_code = r'''
import os, json, re, time, random
from typing import Dict, Any
from openai import OpenAI
from openai import RateLimitError, APIStatusError

class OpenAIJudge:
    """
    Minimal judge that uses the OpenAI SDK Responses API.
    Exposes .score(question, answer) -> Dict[str, Any].
    """
    def __init__(self, model: str):
        self.model = model
        api_key = os.environ.get("OPENAI_API_KEY")
        if not api_key:
            raise RuntimeError("OPENAI_API_KEY not set")
        self.client = OpenAI(api_key=api_key)

    def _call_with_retries(self, prompt: str, max_retries: int = 6) -> str:
        backoff = 1.0
        for _ in range(max_retries):
            try:
                resp = self.client.responses.create(model=self.model, input=prompt)
                return getattr(resp, "output_text", "") or ""
            except RateLimitError:
                time.sleep(backoff + random.uniform(0,0.5))
                backoff = min(backoff*2, 30)
            except APIStatusError as e:
                if 500 <= e.status_code < 600:
                    time.sleep(backoff + random.uniform(0,0.5))
                    backoff = min(backoff*2, 30)
                else:
                    raise
        return ""

    def score(self, question: str, answer: str) -> Dict[str, Any]:
        prompt = f"""
You are an impartial LLM judge.
Score the candidate answer on 0–10 for:
- helpfulness
- factuality
- completeness
- adherence (to the user's ask)

Return ONLY a JSON object with keys:
helpfulness, factuality, completeness, adherence, rationale

# Question
{question}

# Candidate Answer
{answer}
"""
        text = self._call_with_retries(prompt)

        m = re.search(r"\{.*\}", text, flags=re.S)
        if not m:
            return {
                "helpfulness": 0, "factuality": 0, "completeness": 0, "adherence": 0,
                "rationale": "Judge returned unparsable output."
            }
        try:
            payload = json.loads(m.group(0))
        except Exception:
            payload = {
                "helpfulness": 0, "factuality": 0, "completeness": 0, "adherence": 0,
                "rationale": "Judge returned invalid JSON."
            }
        for k in ("helpfulness","factuality","completeness","adherence"):
            try:
                payload[k] = max(0, min(10, float(payload.get(k, 0))))
            except Exception:
                payload[k] = 0.0
        if "rationale" not in payload:
            payload["rationale"] = "No rationale."
        return payload
'''
with open("judges/openai_judge.py", "w") as f: f.write(judge_code)
print("Injected judges/OpenAIJudge")


!python Prune.py \
    --in "{input_file}" \
    --out_scored scored.jsonl \
    --out_top top1k.jsonl \
    --provider {provider} \
    --model {model} \
    --seed 42 \
    --max_concurrency 1

print("\n Processing complete!")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/357.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m357.5/357.5 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25h📁 Mounting Google Drive...
Mounted at /content/drive
✓ Drive mounted successfully!

Please provide the path to your JSONL file in Google Drive.
Example paths:
  - /content/drive/MyDrive/response_raw.jsonl
  - /content/drive/MyDrive/datasets/response_raw.jsonl

Enter the full path to your JSONL file: /content/drive/MyDrive/response_raw.jsonl
✓ Found file: /content/drive/MyDrive/response_raw.jsonl (67.0 MB)

📊 Dataset contains 5000 samples

🔑 Choose your LLM provider:
1 = OpenAI (recommended: gpt-4o-mini)
2 = Anthropic (cl

KeyError: 'question'