# geDIG Attention Update (Modern LLMs)

Goal: sample attention maps from newer LLMs (Llama 3, Phi-3) and check whether geDIG F keeps the same sign/delta vs. random.

Notes:
- Llama 3 requires an HF token and license acceptance.
- Phi-3 is open and should run without a token.


In [26]:
from google.colab import drive

drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
!git clone https://github.com/miyauchikazuyoshi/InsightSpike-AI.git
%cd InsightSpike-AI
!pip -q install -U pip
!pip -q install transformers datasets accelerate sentencepiece networkx scipy huggingface_hub


Cloning into 'InsightSpike-AI'...
remote: Enumerating objects: 4103, done.[K
remote: Counting objects: 100% (851/851), done.[K
remote: Compressing objects: 100% (678/678), done.[K
remote: Total 4103 (delta 242), reused 364 (delta 162), pack-reused 3252 (from 3)[K
Receiving objects: 100% (4103/4103), 142.61 MiB | 15.93 MiB/s, done.
Resolving deltas: 100% (1289/1289), done.
/content/InsightSpike-AI/InsightSpike-AI/InsightSpike-AI/InsightSpike-AI


In [28]:
from huggingface_hub import login
from google.colab import userdata

try:
    # Try to retrieve the token from Colab Secrets
    hf_token = userdata.get('HF_TOKEN')
    if hf_token:
        login(token=hf_token)
        print("Successfully logged in using Colab Secret 'HF_TOKEN'.")
    else:
        # Fallback to interactive login
        print("Secret 'HF_TOKEN' not found. Please log in interactively:")
        login()
except Exception:
    # If userdata is not available or fails, fallback to interactive
    print("Could not access Colab Secrets. Please log in interactively:")
    login()

Successfully logged in using Colab Secret 'HF_TOKEN'.


In [29]:
from huggingface_hub import HfApi
from IPython.display import display, HTML

LLAMA3_ID = "meta-llama/Meta-Llama-3-8B"
PHI3_ID = "microsoft/Phi-3-mini-4k-instruct"

def show_status(ok: bool, title: str, detail: str = ""):
    color = "#1b8a5a" if ok else "#b02a37"
    msg = f"<b>{title}</b>" + (f"<br/>{detail}" if detail else "")
    display(HTML(f"<div style='border:1px solid {color};padding:10px;border-radius:6px;color:{color};margin:8px 0;'>{msg}</div>"))

api = HfApi()
llama3_ok = False
try:
    api.model_info(LLAMA3_ID)
    show_status(True, "Llama 3 access OK", f"Model: {LLAMA3_ID}")
    llama3_ok = True
except Exception as exc:
    show_status(False, "Llama 3 access NOT available", f"{type(exc).__name__}: {exc}")
    print("If you need Llama 3, accept the license on HF and run login().")


In [30]:
DTYPE = "bfloat16"  # use "float16" for T4, "float32" for CPU
ATTN_IMPL = "eager"  # to ensure attentions are returned
TEXT_COUNT = 16
LAYER_CAP = 4
ATTN_MAX_LEN = 256


In [31]:
import json
import numpy as np
from pathlib import Path

outputs = {}


def run_model(model_id: str, tag: str):
    out_path = f"results/transformer_gedig/score_llm_{tag}.json"
    cmd = (
        f"python experiments/transformer_gedig/extract_and_score.py "
        f"--model '{model_id}' "
        f"--text-count {TEXT_COUNT} "
        f"--layer-cap {LAYER_CAP} "
        f"--attn-max-len {ATTN_MAX_LEN} "
        f"--percentile 0.90 "
        f"--out '{out_path}' "
        f"--device auto --dtype {DTYPE} --device-map auto "
        f"--trust-remote-code --attn-implementation {ATTN_IMPL}"
    )
    print(cmd)
    !{cmd}
    outputs[tag] = out_path


# Always run Phi-3
run_model(PHI3_ID, "phi3")

# Run Llama 3 only if access is available
if llama3_ok:
    run_model(LLAMA3_ID, "llama3")
else:
    print("Skipping Llama 3 run (no access).")

python experiments/transformer_gedig/extract_and_score.py --model 'microsoft/Phi-3-mini-4k-instruct' --text-count 16 --layer-cap 4 --attn-max-len 256 --percentile 0.90 --out 'results/transformer_gedig/score_llm_phi3.json' --device auto --dtype bfloat16 --device-map auto --trust-remote-code --attn-implementation eager
`torch_dtype` is deprecated! Use `dtype` instead!
2025-12-28 11:44:14.411596: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766922254.432037    9466 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766922254.438795    9466 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1766922254.454302    9466 computation_placer.cc:177] com

In [32]:
def summarize(path):
    data = json.loads(Path(path).read_text(encoding="utf-8"))
    rows = [r for r in data if not r.get("subgraph")]
    if not rows:
        return None
    f_real = np.array([r["F"] for r in rows], dtype=float)
    f_rand = np.array([r.get("baseline_F_random") for r in rows], dtype=float)
    delta = f_real - f_rand
    return {
        "rows": len(rows),
        "F_real_mean": float(f_real.mean()),
        "F_rand_mean": float(f_rand.mean()),
        "delta_mean": float(delta.mean()),
        "delta_std": float(delta.std()),
        "delta_positive_ratio": float((delta > 0).mean()),
    }

summary = {tag: summarize(path) for tag, path in outputs.items()}
print(json.dumps(summary, indent=2))


{
  "phi3": null,
  "llama3": null
}


In [33]:
import shutil

save_dir = Path("/content/drive/MyDrive/insightspike/gedig_attention")
save_dir.mkdir(parents=True, exist_ok=True)

for tag, path in outputs.items():
    src = Path(path)
    if src.exists():
        shutil.copy2(src, save_dir / src.name)
        print("Saved:", save_dir / src.name)


Saved: /content/drive/MyDrive/insightspike/gedig_attention/score_llm_phi3.json
Saved: /content/drive/MyDrive/insightspike/gedig_attention/score_llm_llama3.json
