In [None]:
# Credentials handling: prefer environment variables or secret managers.
# This cell will NOT prompt interactively. If `PROVIDER_API_KEY` is not set,
# the notebook proceeds without it (safer for reproducibility).

import os

api_key = os.environ.get('PROVIDER_API_KEY')
if api_key:
    # Key is available from environment (do not print it)
    print('PROVIDER_API_KEY found in environment — using it for this session.')
else:
    # No key available; continue without prompting to keep notebook non-interactive
    print('No PROVIDER_API_KEY found. Skipping interactive prompt and proceeding without API key.')

# Note: to provide a key locally, either export PROVIDER_API_KEY before launching
# the notebook, or use a .env file with python-dotenv (ensure .env is in .gitignore).

## Batch scoring performante

Questo esempio mostra come processare in batch una lista di URL in modo efficiente usando `joblib` per l'esecuzione parallela con fallback sequenziale.
Usare il modulo `ai_threat_forensics_toolkit.core.url_risk_scorer.compute_url_risk` garantisce che la logica euristica rimanga spiegabile e riutilizzabile.


In [None]:
# Esempio: batch scoring performante (usa joblib se disponibile)
from ai_threat_forensics_toolkit.core.url_risk_scorer import compute_url_risk
import pathlib
import pandas as pd

# Leggi URL da file (uno per riga)
data_file = pathlib.Path('data/example_urls.txt')
if data_file.exists():
    urls = [u.strip() for u in data_file.read_text(encoding='utf-8').splitlines() if u.strip()]
else:
    urls = [
        'http://login-paypal-security-check.example.com/update',
        'https://www.google.com',
        'http://secure-account-verification-update-login.xyz/reset',
    ]

def score_url(u: str) -> dict:
    r = compute_url_risk(u)
    return {'url': r.url, 'score': r.score, 'verdict': r.verdict}

# Esegui in parallelo se joblib è disponibile, altrimenti sequenziale
try:
    from joblib import Parallel, delayed
    results = Parallel(n_jobs=-1)(delayed(score_url)(u) for u in urls)
except Exception:
    results = [score_url(u) for u in urls]

# Converti in DataFrame per analisi veloce
df = pd.DataFrame(results)
df.sort_values('score', ascending=False, inplace=True)
df.head(20)
