In [None]:
!pip install -r requirements.txt

In [None]:
import json
from tqdm import tqdm
from datasets import load_dataset

from models.model_wrappers import LocalLLM
from evaluation.metrics import is_memorized

In [None]:
DATASET_SIZE = 2000  # sicuro per PC portatile

dataset = load_dataset(
    "code_search_net",
    "python",
    split="train"
)

dataset = dataset.shuffle(seed=42).select(range(DATASET_SIZE))

data = [{
    "function_name": x["func_name"],
    "docstring": x["docstring"],
    "code": x["code"]
} for x in dataset]

print(f"Loaded {len(data)} samples")

In [None]:
with open("prompts/function_prompt.txt") as f:
    FUNCTION_PROMPT = f.read()

In [None]:
model = LocalLLM(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
)

In [None]:
memorized = 0
results = []

for sample in tqdm(data):
    prompt = FUNCTION_PROMPT.replace(
        "{FUNCTION_NAME}", sample["function_name"]
    )

    output = model.generate(prompt)

    hit = is_memorized(
        pred=output,
        gold=sample["docstring"]
    )

    results.append({
        "function_name": sample["function_name"],
        "memorized": hit
    })

    if hit:
        memorized += 1

In [None]:
coverage = memorized / len(data)
print(f"Memorization Coverage: {coverage:.4f}")

In [None]:
with open("results.json", "w") as f:
    json.dump(results, f, indent=2)

print("Results saved to results.json")

In [None]:
import random

random_hits = sum(random.choice([True, False]) for _ in range(len(data)))
print("Random baseline coverage:", random_hits / len(data))