#Block C3: Evaluation of generated architectures (Bootstrap)

In [None]:
# ============================================================
# BLOCK C3 (part 1) — .632 Bootstrap: 1 replicate + initial validation
# ------------------------------------------------------------
# Goal:
#   - Perform ONE stratified bootstrap replicate (≈63% train, ≈37% OOB).
#   - Mine catalogs (FP-Growth) ONLY on the bootstrap (two supports: 0.06 and 0.03).
#   - Generate |OOB| architectures using each catalog (configs '006' and '003').
#   - Validate SIZE ONLY (#services) between OOB and generated:
#       * KS test (D statistic and p-value)
#       * Wasserstein (Earth Mover’s distance)
# Requirements:
#   - df_all in memory with columns ['services','tipo_arquitectura'] (from A0).
#   - Function generate_architecture(seed, rules_df, frequent_itemsets, ...)
#     defined in A2. If not defined, a minimal version is activated.
#   - mlxtend installed (fpgrowth/association_rules).
# Output:
#   - Summary prints: set sizes, KS/Wasserstein per configuration.
# ============================================================

import numpy as np
import pandas as pd
import random, math
from collections import Counter
from sklearn.utils import resample
from scipy.stats import ks_2samp, wasserstein_distance
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

# ---------- 0) Basic utilities ----------

def _sizes_from_transactions(transactions):
    """Returns a list with the # of unique services per architecture."""
    return [len(set(tx)) for tx in transactions]

def stratified_bootstrap(df, label_col='tipo_arquitectura', random_state=123):
    """
    Stratified .632 bootstrap:
      - Resamples WITH replacement WITHIN each class, preserving proportions.
      - Returns df_boot (bootstrap train) and df_oob (out-of-bag, unseen).
    """
    rng = np.random.RandomState(random_state)
    parts = []
    for clase, sub in df.groupby(label_col, dropna=False):
        # resample with replacement to the same size of the stratum
        idx_boot = resample(
            sub.index, replace=True, n_samples=len(sub), random_state=rng
        )
        parts.append(df.loc[idx_boot])
    df_boot = pd.concat(parts, axis=0)

    # OOB: rows from the original that DO NOT appear in the bootstrap (as unique indices)
    used = set(df_boot.index.unique())
    all_idx = set(df.index)
    oob_idx = sorted(list(all_idx - used))
    df_oob = df.loc[oob_idx].copy()

    return df_boot.reset_index(drop=True), df_oob.reset_index(drop=True)

def mine_catalogs(transactions, supports=(0.06, 0.03), conf_min=0.60, lift_min=1.05):
    """
    One-hot + FP-Growth for each support. Returns a dict:
      catalogs[0.06] = (frequent_itemsets_df, rules_df)
      catalogs[0.03] = (frequent_itemsets_df, rules_df)
    """
    te = TransactionEncoder()
    basket = te.fit(transactions).transform(transactions)
    df_basket = pd.DataFrame(basket, columns=te.columns_).astype('uint8')

    catalogs = {}
    for s in supports:
        fis = fpgrowth(df_basket, min_support=s, use_colnames=True)
        rules = association_rules(fis, metric="confidence", min_threshold=conf_min)
        rules = rules[rules['lift'] > lift_min].copy()
        rules.sort_values(['confidence','lift','support'], ascending=False, inplace=True)
        rules.reset_index(drop=True, inplace=True)
        catalogs[round(s, 2)] = (fis, rules)
    return catalogs

def select_seeds(df_boot, k, mix_ratio=0.7, random_state=123):
    """
    Select k seeds for generation:
      - mix_ratio * k: from the Top-10 most prevalent services in df_boot (one per seed)
      - (1 - mix_ratio) * k: samples of real architectures from the boot (one row -> 1–2 services)
    Leaves simple and reproducible traceability.
    """
    rng = np.random.RandomState(random_state)
    tx_boot = df_boot['services'].tolist()
    # prevalences in boot
    c = Counter()
    for tx in tx_boot:
        for s in set(tx):
            c[s] += 1
    top10 = [s for s,_ in c.most_common(10)] or ['S3']

    n_top = int(round(k * mix_ratio))
    n_real = k - n_top

    seeds = []
    # top-service seeds (unitary)
    for _ in range(n_top):
        seeds.append([rng.choice(top10)])
    # seeds from boot architectures (take 1–2 random services from the row)
    if len(df_boot) > 0:
        idxs = rng.choice(len(df_boot), size=n_real, replace=True)
        for i in idxs:
            row = df_boot.iloc[i]
            if len(row['services']) == 0:
                seeds.append(['S3'])
            else:
                s_pick = rng.choice(row['services'], size=min(2, len(row['services'])), replace=False).tolist()
                seeds.append(s_pick)
    return seeds

# ---------- 0-bis) Fallback: minimal generator (if A2 is not in memory) ----------
if 'generate_architecture' not in globals():
    def generate_architecture(seed, rules_df, frequent_itemsets,
                              max_size=10, n_candidates=40, random_state=None):
        """Minimal version of the A2 generator (same weighting criterion)."""
        if random_state is not None:
            random.seed(random_state)
        cur = set(seed)
        for _ in range(n_candidates):
            if len(cur) >= max_size:
                break
            applicable = rules_df[rules_df['antecedents'].apply(lambda x: x.issubset(cur))]
            if not applicable.empty:
                tmp = applicable.copy()
                tmp['weight'] = tmp['confidence'] * tmp['support'] * tmp['lift'].map(lambda l: max(1e-6, math.log2(l)))
                idx = random.choices(tmp.index, weights=tmp['weight'], k=1)[0]
                cons = list(tmp.loc[idx, 'consequents'])
                choice = random.choice(cons)
            else:
                # simple fallback: sample elements from frequent itemsets
                pool = [i for s in frequent_itemsets['itemsets'] for i in s]
                if not pool: break
                choice = random.choice(pool)
            cur.add(choice)
        return sorted(cur)

# ---------- 1) .632 Bootstrap — 1 replicate ----------

df_boot, df_oob = stratified_bootstrap(df_all, label_col='tipo_arquitectura', random_state=2025)

print("=== .632 Bootstrap — 1 replicate ===")
print("Total size:", len(df_all))
print("Bootstrap train (≈63% uniques + repeats):", len(df_boot))
print("OOB (unseen):", len(df_oob))
print("Class distribution in OOB:")
print(df_oob['tipo_arquitectura'].value_counts(dropna=False).to_string())

# ---------- 2) Mining ONLY on bootstrap ----------

tx_boot = df_boot['services'].tolist()
catalogs = mine_catalogs(tx_boot, supports=(0.06, 0.03), conf_min=0.60, lift_min=1.05)

for s, (fis, rules) in catalogs.items():
    print(f"\nCatalog support={s:.2f}  -> itemsets={len(fis)} | rules={len(rules)}")
    print(rules[['antecedents','consequents','support','confidence','lift']].head(3))

# ---------- 3) Generation (two configurations) ----------

N_target = len(df_oob)                             # generate as many as OOB
seeds = select_seeds(df_boot, k=N_target, mix_ratio=0.5, random_state=2025)

def generate_batch(config_key):
    fis, rules = catalogs[0.06] if config_key=='006' else catalogs[0.03]
    gens = []
    for i in range(N_target):
        seed = seeds[i]
        arch = generate_architecture(seed, rules, fis,
                                     max_size=10, n_candidates=100, random_state=2025+i)
        if arch:
            gens.append(arch)
    return gens

gen_006 = generate_batch('006')
gen_003 = generate_batch('003')

print(f"\nGenerated (006): {len(gen_006)} | Generated (003): {len(gen_003)} | OOB: {N_target}")

# ---------- 4) Initial validation: size (#services) ----------

tx_oob = df_oob['services'].tolist()

def eval_size_metrics(real_tx, gen_tx, label):
    real_sizes = _sizes_from_transactions(real_tx)
    gen_sizes  = _sizes_from_transactions(gen_tx)
    ks = ks_2samp(real_sizes, gen_sizes, alternative='two-sided', mode='auto')
    wd = wasserstein_distance(real_sizes, gen_sizes)
    print(f"\n— {label} —")
    print(f"  KS D = {ks.statistic:.4f}  |  p-value = {ks.pvalue:.4g}")
    print(f"  Wasserstein = {wd:.4f}")
    print(f"  Real (mean±std) = {np.mean(real_sizes):.2f}±{np.std(real_sizes):.2f} | "
          f"Gen (mean±std) = {np.mean(gen_sizes):.2f}±{np.std(gen_sizes):.2f}")

eval_size_metrics(tx_oob, gen_006, "Size comparison OOB vs Generated [support 0.06]")
eval_size_metrics(tx_oob, gen_003, "Size comparison OOB vs Generated [support 0.03]")

# (Optional) save seeds and first examples for paper traceability:
# pd.DataFrame({'seed': ['|'.join(s) for s in seeds],
#               'gen_006': ['|'.join(g) for g in gen_006[:10]],
#               'gen_003': ['|'.join(g) for g in gen_003[:10]]}).to_csv("trace_bootstrap_replica1.csv", index=False)


=== Bootstrap .632 — 1 réplica ===
Tamaño total: 396
Train bootstrap (≈63% únicas + repetidas): 396
OOB (no visto): 148
Distribución por clase en OOB:
tipo_arquitectura
None        107
Edge         35
HPC           4
Edge+HPC      2





Catálogo soporte=0.06  -> itemsets=117 | reglas=46
  antecedents consequents   support  confidence      lift
0    (Athena)        (S3)  0.093434         1.0  1.596774
1      (Glue)        (S3)  0.083333         1.0  1.596774
2  (RedShift)        (S3)  0.068182         1.0  1.596774

Catálogo soporte=0.03  -> itemsets=338 | reglas=176
  antecedents consequents   support  confidence      lift
0    (Athena)        (S3)  0.093434         1.0  1.596774
1      (Glue)        (S3)  0.083333         1.0  1.596774
2  (RedShift)        (S3)  0.068182         1.0  1.596774

Generadas (006): 148 | Generadas (003): 148 | OOB: 148

— Comparación de tamaños OOB vs Generadas [soporte 0.06] —
  KS D = 0.8311  |  p-value = 4.667e-52
  Wasserstein = 4.4189
  Real (mean±std) = 7.60±2.10 | Gen (mean±std) = 3.18±1.05

— Comparación de tamaños OOB vs Generadas [soporte 0.03] —
  KS D = 0.7838  |  p-value = 2.731e-45
  Wasserstein = 4.2905
  Real (mean±std) = 7.60±2.10 | Gen (mean±std) = 3.31±1.21


In [None]:
# ============================================================
# PATCH G1 — Size-Aware Generator (aligns synthetic sizes with real ones)
# ------------------------------------------------------------
# Idea:
#   1) Start with the architecture generated by rules (your current generate_architecture).
#   2) Define a target_size ~ real size distribution (#services) from the fold/bootstrap.
#   3) If the generated architecture is smaller, FILL it with plausible services:
#        - sampled with probability proportional to the observed support (frequency) in train
#        - without repeating already present services
#        - respecting an optional "forbidden" set
#        - with the option to require at least one Edge service if desired
#   4) Retry with a different seed if the result is still too small.
#
# Requirements:
#   - 'mine_catalogs' must return "fis" (frequent itemsets) and "rules".
#   - fis is a DataFrame with columns: ['support','itemsets'] (from mlxtend).
#   - df_train is the DF of the fold/bootstrap (to estimate real size distribution).
#   - Your generate_architecture(seed, rules, fis, ...) must already exist.
# ============================================================

import numpy as np
from collections import Counter

# --- (Optional) list of services considered "Edge"
EDGE_SERVICES = set([
    "Snowball","Snowcone","Snowmobile","SnowFamily",
    "SageMakerNeo","SageMakerEdgeManager","Monitron","Panorama",
    "RoboMaker","CloudFront","Greengrass","FreeRTOS","IoTCore","IoTSiteWise",
    "AlexaForBusiness","LocalZones","Wavelength","Outpost","StorageGateway",
    "UserConsumerCamera","UserCompanyEdge","UserConsumerEdge",
    "UserConsumerIOT","UserConsumerPOS","UserCompanyDrone","UserConsumerFarmer",
    "UserConsumerAlexaGoogleHome","UserCompanyElementalLiveDevice","UserConsumerTV",
    "LambdaAtEdge"
])

def _real_size_sampler(df_train, rng, clip=(5, 11)):
    """
    Estimate the distribution of real architecture sizes in 'df_train' and sample a target_size.
    - By default, clip to [5, 11] (approx. between P25 and P75 of your dataset).
    """
    sizes = [len(set(s)) for s in df_train["services"]]
    mu, sd = float(np.mean(sizes)), max(0.8, float(np.std(sizes, ddof=1)))
    # Sample from a “soft” truncated normal distribution
    for _ in range(10):
        t = int(round(rng.normal(mu, sd)))
        if clip[0] <= t <= clip[1]:
            return t
    # fallback: use the median if no valid sample is drawn in 10 attempts
    return int(np.median(sizes))

def _item_weights_from_fis(fis):
    """
    Derive weights per service using the frequent itemsets.
    - Use singleton support if available; otherwise, use the maximum support
      of any itemset containing that service (a reasonable proxy).
    """
    singletons = {list(it)[0]: sup for sup, it in zip(fis["support"], fis["itemsets"]) if len(it)==1}
    if singletons:
        return singletons
    # If no singletons exist, build weights by aggregation
    acc = Counter()
    for sup, it in zip(fis["support"], fis["itemsets"]):
        for s in it:
            acc[s] = max(acc[s], sup)
    return dict(acc)

def _complete_with_items(current, weights, target_size, rng,
                         forbid=frozenset(), require_edge=False):
    """
    Completes 'current' up to target_size by selecting services via roulette proportional to weights.
    - Avoids duplicates and forbidden services.
    - If require_edge=True, ensures at least one service is in EDGE_SERVICES.
    """
    chosen = set(current)
    pool = [s for s in weights.keys() if s not in chosen and s not in forbid]
    if not pool:
        return list(chosen)

    # Probability vector proportional to weights
    w = np.array([weights[s] for s in pool], dtype=float)
    if w.sum() <= 0:
        w = np.ones_like(w)
    p = w / w.sum()

    # Fill until reaching the target size
    while len(chosen) < target_size and len(pool) > 0:
        s = rng.choice(pool, p=p)
        chosen.add(s)
        # remove from pool
        idx = pool.index(s)
        pool.pop(idx); p = np.delete(p, idx)
        if p.sum() > 0:
            p = p / p.sum()

    # If require_edge=True and none exists, try replacing one non-Edge with an Edge service
    if require_edge and not (EDGE_SERVICES & chosen):
        candidates = [s for s in weights.keys() if s in EDGE_SERVICES and s not in chosen and s not in forbid]
        if candidates:
            # choose the most plausible Edge service by weight
            s_edge = max(candidates, key=lambda s: weights.get(s, 0))
            # replace the least weighted non-Edge service if necessary
            non_edge = [s for s in chosen if s not in EDGE_SERVICES]
            if non_edge:
                s_out = min(non_edge, key=lambda s: weights.get(s, 0))
                chosen.remove(s_out); chosen.add(s_edge)

    return list(chosen)

def generate_architecture_size_aware(seed, rules, fis, df_train,
                                     max_size=10, n_candidates=100,
                                     require_edge=False, random_state=2025):
    """
    Size-aware wrapper:
      1) Calls your 'generate_architecture' (rule-based).
      2) If too small, fills with plausible services up to target_size.
      3) Retries once with a different seed if still below a minimum size.
    """
    rng = np.random.RandomState(random_state)
    # 1) target_size guided by real size distribution
    target_size = min(max_size, _real_size_sampler(df_train, rng, clip=(5, 11)))

    # 2) attempt generation via rules
    arch = generate_architecture(seed, rules, fis,
                                 max_size=max_size, n_candidates=n_candidates,
                                 random_state=random_state)

    # 3) if too small, fill with plausible services
    if arch is None: arch = []
    weights = _item_weights_from_fis(fis)
    arch_filled = _complete_with_items(arch, weights, target_size, rng,
                                       forbid=frozenset(), require_edge=require_edge)

    # 4) if (for any reason) still too short, retry once with noise added to the seed
    MIN_SIZE = 5
    if len(set(arch_filled)) < MIN_SIZE:
        seed2 = list(seed) if isinstance(seed, (list, tuple)) else [seed]
        arch2 = generate_architecture(seed2, rules, fis,
                                      max_size=max_size, n_candidates=n_candidates,
                                      random_state=random_state + 13)
        arch_filled = _complete_with_items(arch2 or [], weights, target_size, rng,
                                           forbid=frozenset(), require_edge=require_edge)

    return list(dict.fromkeys(arch_filled))  # stable order without duplicates


In [None]:
# === Utilities used by workers ===
import numpy as np
from scipy.stats import entropy
from math import log

def _service_universe(transactions):
    u = set()
    for t in transactions:
        u.update(t)
    return sorted(u)

def _freq_vector(transactions, universe):
    # OOV-safe version
    counts = dict.fromkeys(universe, 0)
    U = set(universe)
    for t in transactions:
        for s in set(t):
            if s in U:
                counts[s] += 1
    v = np.array([counts[s] for s in universe], dtype=float)
    v = v / max(1.0, v.sum())
    return v

def jensen_shannon(p, q, base=np.e):
    p = np.asarray(p, dtype=float); q = np.asarray(q, dtype=float)
    eps = 1e-12
    p = np.maximum(p, eps); q = np.maximum(q, eps)
    p = p / p.sum(); q = q / q.sum()
    m = 0.5*(p+q)
    return 0.5*entropy(p, m, base=base) + 0.5*entropy(q, m, base=base)

def _cooccurrence_matrix(transactions, universe):
    idx = {s:i for i,s in enumerate(universe)}
    n = len(universe)
    M = np.zeros((n, n), dtype=float)
    for t in transactions:
        tt = sorted({s for s in t if s in idx})
        for i in range(len(tt)):
            for j in range(i, len(tt)):
                a = idx[tt[i]]; b = idx[tt[j]]
                M[a,b] += 1.0
                if a != b:
                    M[b,a] += 1.0
    m = max(1.0, len(transactions))
    return M / m

def frobenius_norm(A, B):
    D = A - B
    return float(np.sqrt((D*D).sum()))

def _confidence_of_rule(rule, transactions):
    ant = set(rule[0]); cons = set(rule[1])
    if not ant: return np.nan
    sup_ant = 0; sup_both = 0
    for t in transactions:
        T = set(t)
        if ant.issubset(T):
            sup_ant += 1
            if cons.issubset(T):
                sup_both += 1
    if sup_ant == 0: return np.nan
    return sup_both / sup_ant

def rules_delta_confidence(rules_df, tx_test, tx_gen, top_k=50):
    if len(rules_df)==0:
        return dict(delta_mean=np.nan, delta_median=np.nan, delta_p95=np.nan, n_eval=0)
    r = rules_df.sort_values('lift', ascending=False).head(top_k)
    deltas = []
    for _, row in r.iterrows():
        ant = tuple(sorted(list(row['antecedents'])))
        con = tuple(sorted(list(row['consequents'])))
        c_test = _confidence_of_rule((ant, con), tx_test)
        c_gen  = _confidence_of_rule((ant, con), tx_gen)
        if np.isfinite(c_test) and np.isfinite(c_gen):
            deltas.append(abs(c_test - c_gen))
    if not deltas:
        return dict(delta_mean=np.nan, delta_median=np.nan, delta_p95=np.nan, n_eval=0)
    deltas = np.asarray(deltas, dtype=float)
    return dict(delta_mean=float(deltas.mean()),
                delta_median=float(np.median(deltas)),
                delta_p95=float(np.percentile(deltas, 95)),
                n_eval=int(len(deltas)))

def jaccard(a, b):
    A, B = set(a), set(b)
    if not A and not B: return 1.0
    return len(A & B) / max(1.0, len(A | B))

def nn_jaccard_stats(tx_gen, tx_real):
    if len(tx_gen)==0 or len(tx_real)==0:
        return dict(mean=np.nan, median=np.nan, p95=np.nan)
    vals = []
    for g in tx_gen:
        best = 0.0
        for r in tx_real:
            best = max(best, jaccard(g, r))
        vals.append(best)
    vals = np.asarray(vals, dtype=float)
    return dict(mean=float(vals.mean()),
                median=float(np.median(vals)),
                p95=float(np.percentile(vals, 95)))

def intra_jaccard_mean(tx):
    n = len(tx)
    if n < 2: return np.nan
    s = 0.0; c = 0
    for i in range(n):
        for j in range(i+1, n):
            s += jaccard(tx[i], tx[j]); c += 1
    return float(s / c)

def coverage_ratio(tx, universe):
    seen = set()
    for t in tx: seen.update(t)
    return len(seen) / max(1.0, len(universe))

def normalized_entropy(transactions, universe):
    counts = dict.fromkeys(universe, 0)
    U = set(universe)
    for t in transactions:
        for s in set(t):
            if s in U:
                counts[s] += 1
    v = np.array([counts[s] for s in universe], dtype=float)
    if v.sum() == 0: return np.nan
    p = v / v.sum()
    h = entropy(p, base=np.e)
    return float(h / max(1.0, log(len(universe))))

def _sizes_from_transactions(transactions):
    return [len(set(t)) for t in transactions]

def _agg_ci(values):
    arr = np.array(values, dtype=float)
    if arr.size == 0:
        return {"mean": np.nan, "p2_5": np.nan, "p97_5": np.nan}
    return {
        "mean": float(arr.mean()),
        "p2_5": float(np.percentile(arr, 2.5)),
        "p97_5": float(np.percentile(arr, 97.5)),
    }


In [None]:
# ============================================================
# BLOCK B1 — Bootstrap .632 (B replicas, parallel)
# With full metrics (size, prevalence, co-occurrences,
# rules, local realism, diversity).
# ------------------------------------------------------------
# Prerequisites:
#   - df_all[['services','tipo_arquitectura']]
#   - Predefined functions: mine_catalogs, select_seeds,
#     generate_architecture_size_aware (PATCH G1),
#     plus utilities (_sizes_from_transactions, _freq_vector, etc.).
# ============================================================

import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from scipy.stats import ks_2samp, wasserstein_distance

# ---------- Parameters ----------
B = 200
N_JOBS = 20
SUPPORTS = (0.06, 0.03)
CONF_MIN  = 0.60
LIFT_MIN  = 1.05
MAX_SIZE  = 10
N_CANDS   = 100
SEED_BASE = 8000   # different from CV to ensure independence

# ============================================================
# Single bootstrap replica function
# ============================================================
def run_bootstrap(rep_idx):
    """
    Runs one .632 bootstrap replica:
      1) Stratify by tipo_arquitectura → df_boot, df_oob
      2) Mine catalogs on df_boot
      3) Generate on df_oob (size-aware)
      4) Compute metrics
    Returns a list of dicts (one row per config).
    """
    df_boot, df_oob = stratified_bootstrap(
        df_all, label_col="tipo_arquitectura", random_state=SEED_BASE + rep_idx
    )
    tx_test = df_oob["services"].tolist()
    n_test  = len(tx_test)
    if n_test == 0:
        return []

    # catalog mining
    catalogs = mine_catalogs(
        df_boot["services"].tolist(),
        supports=SUPPORTS, conf_min=CONF_MIN, lift_min=LIFT_MIN
    )
    # universe for distributional metrics
    universe = _service_universe(df_boot["services"].tolist())

    # seeds from bootstrap
    seeds = select_seeds(df_boot, k=n_test, mix_ratio=0.5,
                         random_state=SEED_BASE + rep_idx)

    rows_this_rep = []
    for cfg in ("006","003"):
        fis, rules = catalogs[0.06] if cfg=="006" else catalogs[0.03]

        # size-aware generation
        gen = []
        for i in range(n_test):
            arch = generate_architecture_size_aware(
                seeds[i], rules, fis, df_boot,
                max_size=MAX_SIZE, n_candidates=N_CANDS,
                require_edge=False,
                random_state=SEED_BASE + rep_idx*100000 + i
            )
            if arch: gen.append(arch)

        # === METRICS ===
        # Sizes
        real_sizes = _sizes_from_transactions(tx_test)
        gen_sizes  = _sizes_from_transactions(gen)
        ks = ks_2samp(real_sizes, gen_sizes, alternative="two-sided", mode="auto")
        wd = wasserstein_distance(real_sizes, gen_sizes)

        # Prevalence — JSD
        p_real = _freq_vector(tx_test, universe)
        p_gen  = _freq_vector(gen,      universe)
        jsd = float(jensen_shannon(p_real, p_gen, base=np.e))

        # Co-occurrences
        C_real = _cooccurrence_matrix(tx_test, universe)
        C_gen  = _cooccurrence_matrix(gen,      universe)
        frob = frobenius_norm(C_real, C_gen)

        # Rules — Δ-confidence
        delta_rules = rules_delta_confidence(rules, tx_test, gen, top_k=50)

        # Local realism — NN-Jaccard
        nn = nn_jaccard_stats(gen, tx_test)

        # Diversity
        intra = intra_jaccard_mean(gen)
        cov   = coverage_ratio(gen, universe)
        Hn    = normalized_entropy(gen, universe)

        rows_this_rep.append({
            "replica": rep_idx, "config": cfg,
            "ks_D": ks.statistic, "ks_p": ks.pvalue,
            "wasserstein": wd,
            "real_mean": float(np.mean(real_sizes)),
            "gen_mean":  float(np.mean(gen_sizes)),
            "jsd": jsd, "frob": frob,
            "rules_delta_mean": delta_rules["delta_mean"],
            "rules_delta_median": delta_rules["delta_median"],
            "rules_delta_p95": delta_rules["delta_p95"],
            "rules_n_eval": delta_rules["n_eval"],
            "nn_jacc_mean": nn["mean"],
            "nn_jacc_median": nn["median"],
            "nn_jacc_p95": nn["p95"],
            "intra_jacc_mean": intra,
            "coverage": cov,
            "entropy_norm": Hn,
            "n_test": n_test, "n_gen": len(gen),
        })
    return rows_this_rep

# ============================================================
# Run all replicas in parallel
# ============================================================
results_nested = Parallel(n_jobs=N_JOBS, backend="loky", verbose=10)(
    delayed(run_bootstrap)(b) for b in range(B)
)

# flatten
rows = [row for sublist in results_nested for row in sublist]
df_boot = pd.DataFrame(rows)

# ============================================================
# Summary by configuration (mean + 95% CI)
# ============================================================
def summarize_cfg(df, cfg):
    sub = df[df["config"]==cfg]
    def s(col):
        a = sub[col].dropna().values
        out = _agg_ci(a)
        return pd.Series({
            f"{col}_mean": out["mean"],
            f"{col}_p2.5": out["p2_5"],
            f"{col}_p97.5": out["p97_5"],
        })
    cols = [
        "gen_mean","jsd","frob",
        "rules_delta_mean","rules_delta_median","rules_delta_p95",
        "nn_jacc_mean","nn_jacc_median","nn_jacc_p95",
        "intra_jacc_mean","coverage","entropy_norm"
    ]
    parts = [s(c) for c in cols]
    parts.append(pd.Series({"N_reps": len(sub)}))
    return pd.concat(parts)

summary = pd.concat({
    "support_0.06": summarize_cfg(df_boot, "006"),
    "support_0.03": summarize_cfg(df_boot, "003"),
}, axis=1)

print("=== Bootstrap .632 summary — B replicas ===")
print(summary.round(4).to_string())

# Optional: save
df_boot.to_csv("bootstrap_B200_full_metrics.csv", index=False)
summary.round(4).to_csv("bootstrap_B200_full_summary.csv")


[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:  1.0min
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:  2.4min
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:  2.6min
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:  4.0min
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:  4.2min
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:  5.6min
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:  7.0min
[Parallel(n_jobs=20)]: Done 105 tasks      | elapsed:  8.4min
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed:  9.8min
[Parallel(n_jobs=20)]: Done 141 tasks      | elapsed: 11.0min
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed: 11.9min
[Parallel(n_jobs=20)]: Done 182 out of 200 | elapsed: 13.5min remaining:  1.3min


=== Resumen Bootstrap .632 — B réplicas ===
                          support_0.06  support_0.03
gen_mean_mean                   7.6280        7.6380
gen_mean_p2.5                   7.3241        7.3494
gen_mean_p97.5                  7.9643        7.9654
jsd_mean                        0.0767        0.0601
jsd_p2.5                        0.0624        0.0484
jsd_p97.5                       0.0927        0.0727
frob_mean                       1.7485        1.5876
frob_p2.5                       1.4253        1.2629
frob_p97.5                      2.0532        1.8748
rules_delta_mean_mean           0.2281        0.2821
rules_delta_mean_p2.5           0.1453        0.1689
rules_delta_mean_p97.5          0.3071        0.4864
rules_delta_median_mean         0.2183        0.2448
rules_delta_median_p2.5         0.1161        0.1186
rules_delta_median_p97.5        0.3167        0.5000
rules_delta_p95_mean            0.4880        0.6849
rules_delta_p95_p2.5            0.3373        0.4351
ru

[Parallel(n_jobs=20)]: Done 200 out of 200 | elapsed: 14.0min finished


In [None]:
# 1) From existing labels
df_edge = df_all[df_all["tipo_arquitectura"] == "Edge"].copy()

# 2) (Alternative) If labels are not available, mark Edge by services:
# edge_services = [...]  # your list of Edge services used earlier
# df_edge = df_all[df_all["services"].apply(lambda ss: any(s in edge_services for s in ss))].copy()

print(len(df_edge))  # should return ~101


101


In [None]:
# ============================================================
# BLOCK B1-EDGE — Bootstrap .632 (B replicas, parallel)
# Subset: Edge architectures (df_edge)
# Metrics: size, prevalence (JSD), co-occurrence (Frobenius),
#          rules (Δ-confidence), local realism (NN-Jaccard),
#          diversity (intra-Jaccard, coverage, entropy).
# ------------------------------------------------------------
# Environment prerequisites:
#   - df_edge[['services']]  (101 Edge architectures)
#   - Utility functions already defined in the notebook:
#       mine_catalogs, select_seeds, generate_architecture_size_aware,
#       _sizes_from_transactions, _service_universe, _freq_vector,
#       jensen_shannon, _cooccurrence_matrix, frobenius_norm,
#       rules_delta_confidence, nn_jaccard_stats, intra_jaccard_mean,
#       coverage_ratio, normalized_entropy, _agg_ci
# ============================================================

import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from scipy.stats import ks_2samp, wasserstein_distance

# ---------- Parameters ----------
B = 200
N_JOBS   = 20
SUPPORTS = (0.06, 0.03)
CONF_MIN = 0.60
LIFT_MIN = 1.05
MAX_SIZE = 10
N_CANDS  = 100
SEED_BASE = 9100         # different from other experiments
REQUIRE_EDGE = True      # enforce at least one edge service in generated architectures

# ---------- Bootstrap .632 on a single stratum (Edge) ----------
def bootstrap_split_edge(df, random_state=None):
    """
    Return (df_boot, df_oob) for a .632 bootstrap:
      - df_boot: resampled dataset with replacement (size N)
      - df_oob : out-of-bag observations (≈ 36.8% expected)
    """
    rng = np.random.RandomState(random_state)
    n = len(df)
    idx_boot = rng.randint(0, n, size=n)          # resample with replacement
    boot = df.iloc[idx_boot].copy()

    # oob = observations NOT present in idx_boot
    mask_seen = np.zeros(n, dtype=bool)
    mask_seen[np.unique(idx_boot)] = True
    oob = df.iloc[~mask_seen].copy()
    return boot, oob

# ---------- One bootstrap replica (Edge) ----------
def run_bootstrap_edge(rep_idx):
    """
    Steps:
      1) Perform .632 bootstrap on df_edge -> df_boot, df_oob
      2) Mine frequent itemsets in df_boot (supports 0.06 and 0.03)
      3) Generate |OOB| architectures using the size-aware generator
      4) Compute metrics and return 2 rows (configs 006 and 003)
    """
    df_boot, df_oob = bootstrap_split_edge(df_edge, random_state=SEED_BASE + rep_idx)
    tx_test = df_oob["services"].tolist()
    n_test  = len(tx_test)
    if n_test == 0:
        return []

    # Frequent itemset mining (Edge) on the bootstrap sample
    catalogs = mine_catalogs(
        df_boot["services"].tolist(),
        supports=SUPPORTS, conf_min=CONF_MIN, lift_min=LIFT_MIN
    )

    # Service universe for JSD / co-occurrence metrics
    universe = _service_universe(df_boot["services"].tolist())

    # Seeds (50% real + 50% hybrid), drawn from df_boot
    seeds = select_seeds(df_boot, k=n_test, mix_ratio=0.5,
                         random_state=SEED_BASE + rep_idx)

    rows_this_rep = []
    for cfg in ("006", "003"):
        fis, rules = catalogs[0.06] if cfg == "006" else catalogs[0.03]

        # ---- Size-aware generation for |OOB|
        gen = []
        for i in range(n_test):
            arch = generate_architecture_size_aware(
                seeds[i], rules, fis, df_boot,
                max_size=MAX_SIZE, n_candidates=N_CANDS,
                require_edge=REQUIRE_EDGE,                  # <- key for Edge synthesis
                random_state=SEED_BASE + rep_idx*100000 + i
            )
            if arch:
                gen.append(arch)

        # ====== METRICS ======
        # Size comparison (additional KS/Wasserstein)
        real_sizes = _sizes_from_transactions(tx_test)
        gen_sizes  = _sizes_from_transactions(gen)
        ks = ks_2samp(real_sizes, gen_sizes, alternative="two-sided", mode="auto")
        wd = wasserstein_distance(real_sizes, gen_sizes)

        # Prevalence — JSD
        p_real = _freq_vector(tx_test, universe)
        p_gen  = _freq_vector(gen,      universe)
        jsd = float(jensen_shannon(p_real, p_gen, base=np.e))

        # Co-occurrence — Frobenius norm
        C_real = _cooccurrence_matrix(tx_test, universe)
        C_gen  = _cooccurrence_matrix(gen,      universe)
        frob = frobenius_norm(C_real, C_gen)

        # Rules — Δ-confidence (evaluates top-K mined rules)
        delta_rules = rules_delta_confidence(rules, tx_test, gen, top_k=50)

        # Local realism — NN-Jaccard
        nn = nn_jaccard_stats(gen, tx_test)

        # Diversity
        intra = intra_jaccard_mean(gen)
        cov   = coverage_ratio(gen, universe)
        Hn    = normalized_entropy(gen, universe)

        rows_this_rep.append({
            "replica": rep_idx, "config": cfg,
            "ks_D": ks.statistic, "ks_p": ks.pvalue, "wasserstein": wd,
            "real_mean": float(np.mean(real_sizes)),
            "gen_mean":  float(np.mean(gen_sizes)),
            "jsd": jsd, "frob": frob,
            "rules_delta_mean": delta_rules["delta_mean"],
            "rules_delta_median": delta_rules["delta_median"],
            "rules_delta_p95": delta_rules["delta_p95"],
            "rules_n_eval": delta_rules["n_eval"],
            "nn_jacc_mean": nn["mean"],
            "nn_jacc_median": nn["median"],
            "nn_jacc_p95": nn["p95"],
            "intra_jacc_mean": intra,
            "coverage": cov,
            "entropy_norm": Hn,
            "n_test": n_test, "n_gen": len(gen),
        })
    return rows_this_rep

# ---------- Run in parallel ----------
results_nested = Parallel(n_jobs=N_JOBS, backend="loky", verbose=10)(
    delayed(run_bootstrap_edge)(b) for b in range(B)
)

# Flatten and build final metrics DataFrame (per replica/config)
rows = [row for sublist in results_nested for row in sublist]
df_boot_edge = pd.DataFrame(rows)

# ---------- Summary (mean + 95% CI) per configuration ----------
def summarize_cfg(df, cfg):
    sub = df[df["config"] == cfg]
    def s(col):
        a = sub[col].dropna().values
        out = _agg_ci(a)
        return pd.Series({
            f"{col}_mean":  out["mean"],
            f"{col}_p2.5":  out["p2_5"],
            f"{col}_p97.5": out["p97_5"],
        })
    cols = [
        "gen_mean","jsd","frob",
        "rules_delta_mean","rules_delta_median","rules_delta_p95",
        "nn_jacc_mean","nn_jacc_median","nn_jacc_p95",
        "intra_jacc_mean","coverage","entropy_norm"
    ]
    parts = [s(c) for c in cols]
    parts.append(pd.Series({"N_reps": len(sub)}))
    return pd.concat(parts)

summary_edge = pd.concat({
    "support_0.06": summarize_cfg(df_boot_edge, "006"),
    "support_0.03": summarize_cfg(df_boot_edge, "003"),
}, axis=1)

print("=== Bootstrap .632 (Edge) — B replicas summary ===")
print(summary_edge.round(4).to_string())

# Save results (for paper appendices)
df_boot_edge.to_csv("edge_bootstrap_B200_full_metrics.csv", index=False)
summary_edge.round(4).to_csv("edge_bootstrap_B200_full_summary.csv")
print("Files saved:\n- edge_bootstrap_B200_full_metrics.csv\n- edge_bootstrap_B200_full_summary.csv")


[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   1 tasks      | elapsed:   11.9s
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:  1.0min
[Parallel(n_jobs=20)]: Done  21 tasks      | elapsed:  2.5min
[Parallel(n_jobs=20)]: Done  32 tasks      | elapsed:  3.9min
[Parallel(n_jobs=20)]: Done  45 tasks      | elapsed:  5.0min
[Parallel(n_jobs=20)]: Done  58 tasks      | elapsed:  6.3min
[Parallel(n_jobs=20)]: Done  73 tasks      | elapsed:  7.5min
[Parallel(n_jobs=20)]: Done  88 tasks      | elapsed:  9.6min
[Parallel(n_jobs=20)]: Done 105 tasks      | elapsed: 13.4min
[Parallel(n_jobs=20)]: Done 122 tasks      | elapsed: 16.7min
[Parallel(n_jobs=20)]: Done 141 tasks      | elapsed: 19.5min
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed: 22.1min
[Parallel(n_jobs=20)]: Done 182 out of 200 | elapsed: 24.5min remaining:  2.4min


=== Resumen Bootstrap .632 (Edge) — B réplicas ===
                          support_0.06  support_0.03
gen_mean_mean                   8.3901        8.8878
gen_mean_p2.5                   7.7812        8.1626
gen_mean_p97.5                  9.1319        9.5386
jsd_mean                        0.1121        0.1131
jsd_p2.5                        0.0802        0.0802
jsd_p97.5                       0.1552        0.1689
frob_mean                       3.0163        3.1117
frob_p2.5                       2.1525        2.2508
frob_p97.5                      4.8103        4.3770
rules_delta_mean_mean           0.4931        0.5841
rules_delta_mean_p2.5           0.2433        0.0000
rules_delta_mean_p97.5          0.9122        1.0000
rules_delta_median_mean         0.4835        0.5907
rules_delta_median_p2.5         0.1667        0.0000
rules_delta_median_p97.5        1.0000        1.0000
rules_delta_p95_mean            0.8882        0.8001
rules_delta_p95_p2.5            0.5708        0.

[Parallel(n_jobs=20)]: Done 200 out of 200 | elapsed: 55.3min finished
