In [1]:
# === Inspect 6 / 7 / 8 and build Top-20 tables with % and Cumulative % (fixed) ===
import pandas as pd, re, os
from pathlib import Path

BASE = "/content/drive/MyDrive/DataInBrief-2025"
PATH_6 = f"{BASE}/6-High-Capacity-Set.txt"
PATH_7 = f"{BASE}/7-High-Capacity-Set-Freq.txt"
PATH_8 = f"{BASE}/8-High-Capaity-Freq2.txt"   # keep spelling as in your Drive

OUTDIR = Path(BASE) / "outputs"
OUTDIR.mkdir(parents=True, exist_ok=True)

RID_RE = re.compile(r"([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})")

def sniff(path, n=5):
    print(f"\n--- Sniff: {path} ---")
    if not os.path.exists(path):
        print("  (missing)")
        return
    with open(path, "r", errors="ignore") as f:
        for i, ln in enumerate(f):
            if i >= n: break
            print(f"{i+1:02d}: {ln.rstrip()}")

def load_hicap_set(path):
    """Parse 6-High-Capacity-Set.txt to extract router IDs."""
    ids = []
    if not os.path.exists(path):
        print(f"[6] missing: {path}")
        return pd.DataFrame(columns=["node_id"])
    with open(path, "r", errors="ignore") as f:
        for ln in f:
            m = RID_RE.search(ln)
            if m: ids.append(m.group(0))
    sr = pd.Series(ids, name="node_id").dropna().astype(str).str.strip()
    uniq = sr.drop_duplicates()
    print(f"[6] Extracted IDs: {len(sr)} | Unique: {len(uniq)}")
    return uniq.to_frame()

def load_freq(path):
    """Read whitespace-delimited (node_id, frequency)."""
    if not os.path.exists(path):
        print(f"[freq] missing: {path}")
        return pd.DataFrame(columns=["node_id","frequency"])
    df = pd.read_csv(path, sep=r"\s+", header=None, names=["node_id","frequency"], engine="python")
    df["frequency"] = pd.to_numeric(df["frequency"], errors="coerce")
    df = df.dropna(subset=["node_id","frequency"]).copy()
    df["node_id"] = df["node_id"].astype(str).str.strip()
    df["frequency"] = df["frequency"].astype(int)
    df = df.sort_values("frequency", ascending=False, kind="mergesort").reset_index(drop=True)
    print(f"[freq] {os.path.basename(path)} -> rows={len(df)} | unique IDs={df['node_id'].nunique()} | top={df['frequency'].iloc[0] if len(df) else 'NA'} | total={df['frequency'].sum()}")
    return df

def maybe_filter_to_hc(df_freq, hc_ids, tag):
    """Filter to IDs present in 6-High-Capacity-Set, with a safe fallback."""
    if df_freq.empty or not hc_ids:
        return df_freq
    filtered = df_freq[df_freq["node_id"].isin(hc_ids)].reset_index(drop=True)
    if filtered.empty:
        print(f"[{tag}] Warning: no overlap with set 6; leaving unfiltered.")
        return df_freq
    if len(filtered) < len(df_freq):
        print(f"[{tag}] Filtered to High-Capacity membership: {len(filtered)} rows (from {len(df_freq)})")
    return filtered

def make_table2(df_freq, top_k=20, tag="file"):
    """Compute % of total and cumulative %, print a Word-ready top_k table and save CSV."""
    if df_freq.empty:
        print(f"[{tag}] No data.")
        return None
    total = df_freq["frequency"].sum()
    df = df_freq.copy()
    df["% of Total"] = 100.0 * df["frequency"] / total
    df["Cumulative %"] = df["% of Total"].cumsum()
    top = df.head(top_k).copy()
    top.insert(0, "Node ID (8 chars)", top["node_id"].str[:8])
    top = top.rename(columns={"frequency":"Frequency"})
    top[["% of Total","Cumulative %"]] = top[["% of Total","Cumulative %"]].round(2)

    print(f"\n[{tag}] Top-{top_k} table (sorted by Frequency desc):")
    print(top[["Node ID (8 chars)","Frequency","% of Total","Cumulative %"]].to_string(index=False))

    top_k_share = float(top["Cumulative %"].iloc[-1])
    print(f"\n[{tag}] Top-{top_k} cumulative share of total selections: {top_k_share:.2f}%")

    # Save archive CSV with full node_id
    out_csv = OUTDIR / f"table2_top{top_k}_{tag}.csv"
    top[["node_id","Frequency","% of Total","Cumulative %"]].to_csv(out_csv, index=False)
    print(f"[{tag}] Saved: {out_csv}")
    return top

# --- Run ---
sniff(PATH_6, n=5)
sniff(PATH_7, n=5)
sniff(PATH_8, n=5)

df6 = load_hicap_set(PATH_6)
df7 = load_freq(PATH_7)
df8 = load_freq(PATH_8)

hc_ids = set(df6["node_id"]) if not df6.empty else set()
df7 = maybe_filter_to_hc(df7, hc_ids, tag="7")
df8 = maybe_filter_to_hc(df8, hc_ids, tag="8")

t7 = make_table2(df7, top_k=20, tag="7-High-Capacity-Set-Freq")
t8 = make_table2(df8, top_k=20, tag="8-High-Capaity-Freq2")



--- Sniff: /content/drive/MyDrive/DataInBrief-2025/6-High-Capacity-Set.txt ---
01: 
02: High-Capacity	21:59:00		Node	ID:	IojVH~dnHmdHpG6ws6-MqFQ9NfIYDIPyT5pYrXH5BS8=
03: High-Capacity	21:59:00		Node	ID:	KQZPx5UMW8hElPMDGA1CQalD8-K~zV0XIurE5aQLnUI=
04: High-Capacity	21:59:00		Node	ID:	ccMgCyreHWdI55Hj1ehU30cZ5G0d9YVa~6Tr9Iky7j0=
05: High-Capacity	21:59:00		Node	ID:	tJFdO58cakuNTpGg1uJVL2AJLatabtX8aBHnRCeIiZ4=

--- Sniff: /content/drive/MyDrive/DataInBrief-2025/7-High-Capacity-Set-Freq.txt ---
01: tXuneoZNBl-QZ3O0ceo4b8vsEGiUrplEdELmb0Ou8RU=	906
02: TU4AVivTZ6DfGf~npsox8OwzaQHEU8U04gZEvKwGL~I=	686
03: hA~tqWxfetuEhuYXq1uJ3yHKTYmcYypGLp9OTR3Z7vg=	583
04: N7T9mVbu6g3jb0zEbEfe0qhrh453OVClakvDLlV4h3M=	425
05: x-eBfsgOxUi0KreCiFKohMzh2T0nhoRz7v3oe44ROyk=	414

--- Sniff: /content/drive/MyDrive/DataInBrief-2025/8-High-Capaity-Freq2.txt ---
01: tXuneoZNBl-QZ3O0ceo4b8vsEGiUrplEdELmb0Ou8RU=	906
02: TU4AVivTZ6DfGf~npsox8OwzaQHEU8U04gZEvKwGL~I=	686
03: hA~tqWxfetuEhuYXq1uJ3yHKTYmcYypGLp9OTR3Z7vg=	5

**NOTE - Next**

In [2]:
# STEP 6 ONLY: Parse High-Capacity membership list from "6-High-Capacity-Set.txt"
import re, os
import pandas as pd
from pathlib import Path

BASE = "/content/drive/MyDrive/DataInBrief-2025"
PATH_6 = f"{BASE}/6-High-Capacity-Set.txt"
OUTDIR = Path(BASE) / "outputs"
OUTDIR.mkdir(parents=True, exist_ok=True)

# Router ID pattern: 44 chars (base64-like, with -, ~, +, /) and optional '=' padding at end.
RID_RE = re.compile(r"([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})")

def sniff(path, n=5):
    print(f"\n--- Sniff: {path} ---")
    if not os.path.exists(path):
        print("  (missing)"); return
    with open(path, "r", errors="ignore") as f:
        for i, ln in enumerate(f):
            if i >= n: break
            print(f"{i+1:02d}: {ln.rstrip()}")

# 1) Quick sniff
sniff(PATH_6, n=5)

# 2) Extract router IDs from file 6
ids = []
lines_total = 0
with open(PATH_6, "r", errors="ignore") as f:
    for ln in f:
        lines_total += 1
        m = RID_RE.search(ln)
        if m:
            ids.append(m.group(0).strip())

sr_ids = pd.Series(ids, name="node_id")
uniq_ids = sr_ids.dropna().drop_duplicates().reset_index(drop=True)

print(f"\n[6] lines read: {lines_total}")
print(f"[6] extracted IDs: {len(sr_ids)}")
print(f"[6] unique node_ids: {len(uniq_ids)}")

# 3) (Diagnostic) counts of appearances within file 6 itself
#    This is NOT the selection frequency used in Table 2; it's just how many times an ID appears in this text file.
counts = (sr_ids.value_counts()
          .rename_axis("node_id")
          .reset_index(name="appearances_in_file6"))

print("\n[6] Top 10 appearance counts in file 6 (diagnostic):")
print(counts.head(10).to_string(index=False))

# 4) Save outputs for inspection
unique_csv = OUTDIR / "hc_set_unique_ids_from_file6.csv"
counts_csv = OUTDIR / "hc_set_file6_appearance_counts.csv"
uniq_ids.to_frame().to_csv(unique_csv, index=False)
counts.to_csv(counts_csv, index=False)

print(f"\n[Saved] Unique IDs: {unique_csv}")
print(f"[Saved] Diagnostic counts: {counts_csv}")

# 5) Show a small sample so you can check formatting
print("\nSample unique IDs:")
print(uniq_ids.head(5).to_string(index=False))



--- Sniff: /content/drive/MyDrive/DataInBrief-2025/6-High-Capacity-Set.txt ---
01: 
02: High-Capacity	21:59:00		Node	ID:	IojVH~dnHmdHpG6ws6-MqFQ9NfIYDIPyT5pYrXH5BS8=
03: High-Capacity	21:59:00		Node	ID:	KQZPx5UMW8hElPMDGA1CQalD8-K~zV0XIurE5aQLnUI=
04: High-Capacity	21:59:00		Node	ID:	ccMgCyreHWdI55Hj1ehU30cZ5G0d9YVa~6Tr9Iky7j0=
05: High-Capacity	21:59:00		Node	ID:	tJFdO58cakuNTpGg1uJVL2AJLatabtX8aBHnRCeIiZ4=

[6] lines read: 629962
[6] extracted IDs: 629934
[6] unique node_ids: 2132

[6] Top 10 appearance counts in file 6 (diagnostic):
                                     node_id  appearances_in_file6
Z1bh-f140eBV-XG4GVbx7zlB0xCCOLa~3AsBk9Vw~Dc=                  3926
srLHIzJ8mfEd90JEsaWUDed2OTpaXUWTkbV4f64rXzI=                  3799
moCfkmqXPxMoXWtIWMndXifKaTS5j-AK-g3NeI5J1HA=                  3576
TDQO~djJ4hRVb26rrKpTzqNnGmlyZyvbEKxxEFfY8UQ=                  3369
IojVH~dnHmdHpG6ws6-MqFQ9NfIYDIPyT5pYrXH5BS8=                  3186
onjR2Et2hLSvokqE7b7LtThEgD~GhKo6q0raO9zoITI=           

In [3]:
# STEP: File 6 only — Top-20 IDs by appearance within file 6 (diagnostic)
import re, os
import pandas as pd
from pathlib import Path

BASE = "/content/drive/MyDrive/DataInBrief-2025"
PATH_6 = f"{BASE}/6-High-Capacity-Set.txt"
OUTDIR = Path(BASE) / "outputs"
OUTDIR.mkdir(parents=True, exist_ok=True)

# Router ID pattern (44 chars base64-like incl -,~, +, /), optional '=' padding
RID_RE = re.compile(r"([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})")

def sniff(path, n=5):
    print(f"\n--- Sniff: {path} ---")
    if not os.path.exists(path):
        print("  (missing)"); return
    with open(path, "r", errors="ignore") as f:
        for i, ln in enumerate(f):
            if i >= n: break
            print(f"{i+1:02d}: {ln.rstrip()}")

# 1) Quick sniff
sniff(PATH_6, n=5)

# 2) Extract IDs
ids = []
lines_total = 0
with open(PATH_6, "r", errors="ignore") as f:
    for ln in f:
        lines_total += 1
        m = RID_RE.search(ln)
        if m:
            ids.append(m.group(0).strip())

sr_ids = pd.Series(ids, name="node_id")
uniq_ids = sr_ids.dropna().drop_duplicates().reset_index(drop=True)

print(f"\n[6] lines read: {lines_total}")
print(f"[6] extracted IDs: {len(sr_ids)}")
print(f"[6] unique node_ids: {len(uniq_ids)}")

# 3) Diagnostic appearance counts within file 6
counts = (sr_ids.value_counts()
          .rename_axis("node_id")
          .reset_index(name="appearances_in_file6"))

# Compute % and cumulative % within file 6 (diagnostic)
total_appearances = counts["appearances_in_file6"].sum()
counts["% of File6"] = 100.0 * counts["appearances_in_file6"] / total_appearances
counts["Cumulative %"] = counts["% of File6"].cumsum()
counts[["% of File6","Cumulative %"]] = counts[["% of File6","Cumulative %"]].round(2)

# Top-20 table
top20 = counts.head(20).copy()
top20.insert(0, "Node ID (8 chars)", top20["node_id"].str[:8])
print("\n[6] Top-20 by appearance within file 6 (diagnostic):")
print(top20[["Node ID (8 chars)","appearances_in_file6","% of File6","Cumulative %"]]
      .rename(columns={"appearances_in_file6":"Appearances"})
      .to_string(index=False))

# 4) Save CSVs
unique_csv = OUTDIR / "hc_set_unique_ids_from_file6.csv"
counts_csv = OUTDIR / "hc_set_file6_appearance_counts.csv"
top20_csv  = OUTDIR / "hc_set_file6_top20_diagnostic.csv"

uniq_ids.to_frame().to_csv(unique_csv, index=False)
counts.to_csv(counts_csv, index=False)
top20.to_csv(top20_csv, index=False)

print(f"\n[Saved] Unique IDs: {unique_csv}")
print(f"[Saved] All appearance counts (diagnostic): {counts_csv}")
print(f"[Saved] Top-20 (diagnostic): {top20_csv}")



--- Sniff: /content/drive/MyDrive/DataInBrief-2025/6-High-Capacity-Set.txt ---
01: 
02: High-Capacity	21:59:00		Node	ID:	IojVH~dnHmdHpG6ws6-MqFQ9NfIYDIPyT5pYrXH5BS8=
03: High-Capacity	21:59:00		Node	ID:	KQZPx5UMW8hElPMDGA1CQalD8-K~zV0XIurE5aQLnUI=
04: High-Capacity	21:59:00		Node	ID:	ccMgCyreHWdI55Hj1ehU30cZ5G0d9YVa~6Tr9Iky7j0=
05: High-Capacity	21:59:00		Node	ID:	tJFdO58cakuNTpGg1uJVL2AJLatabtX8aBHnRCeIiZ4=

[6] lines read: 629962
[6] extracted IDs: 629934
[6] unique node_ids: 2132

[6] Top-20 by appearance within file 6 (diagnostic):
Node ID (8 chars)  Appearances  % of File6  Cumulative %
         Z1bh-f14         3926        0.62          0.62
         srLHIzJ8         3799        0.60          1.23
         moCfkmqX         3576        0.57          1.79
         TDQO~djJ         3369        0.53          2.33
         IojVH~dn         3186        0.51          2.83
         onjR2Et2         2838        0.45          3.29
         tlVPuTPT         2760        0.44          3.72
 

**File 7 - High Capacity and Cummulative Percentages**

In [4]:
# Build Table 2 from file 7 (fallback: file 8), constrained to IDs in file 6
import re, os
import pandas as pd
from pathlib import Path

BASE = "/content/drive/MyDrive/DataInBrief-2025"
PATH6 = f"{BASE}/6-High-Capacity-Set.txt"
PATH7 = f"{BASE}/7-High-Capacity-Set-Freq.txt"
PATH8 = f"{BASE}/8-High-Capaity-Freq2.txt"
OUT  = Path(BASE) / "outputs"
OUT.mkdir(parents=True, exist_ok=True)

RID_RE = re.compile(r"([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})")

def load_set_ids(path):
    ids = []
    with open(path, "r", errors="ignore") as f:
        for ln in f:
            m = RID_RE.search(ln)
            if m: ids.append(m.group(0).strip())
    return pd.Series(ids, name="node_id").drop_duplicates().reset_index(drop=True)

def load_freq(path):
    # two-column TSV: node_id \t freq
    df = pd.read_csv(path, sep=r"\s+", header=None, names=["node_id","freq"], engine="python")
    # keep only sensible rows
    df["node_id"] = df["node_id"].astype(str).str.strip()
    df["freq"]    = pd.to_numeric(df["freq"], errors="coerce")
    df = df.dropna(subset=["node_id","freq"])
    return df

# 1) membership (file 6)
set_ids = load_set_ids(PATH6)
hc_set = set_ids.to_frame()

# 2) frequencies (file 7, else file 8)
freq_path = PATH7 if os.path.exists(PATH7) else PATH8
df_freq = load_freq(freq_path)

# 3) keep only IDs that are in the high-capacity set
df_freq = df_freq[df_freq["node_id"].isin(hc_set["node_id"])].copy()

# 4) rank + percentages
df_freq = df_freq.sort_values("freq", ascending=False).reset_index(drop=True)
total = df_freq["freq"].sum()
df_freq["% of Total"]   = (100.0 * df_freq["freq"] / total).round(2)
df_freq["Cumulative %"] = df_freq["% of Total"].cumsum().round(2)
df_freq.insert(0, "Node ID (8 chars)", df_freq["node_id"].str[:8])

# 5) Top-20 view for Table 2
top20 = df_freq.head(20).copy()
print(top20[["Node ID (8 chars)", "freq", "% of Total", "Cumulative %"]]
      .rename(columns={"freq":"Frequency"}).to_string(index=False))

# Save an export you can paste from
top20_out = OUT / "table2_highcap_top20_with_cumulative.csv"
top20.rename(columns={"freq":"Frequency"}).to_csv(top20_out, index=False)
print(f"\nSaved: {top20_out}")


Node ID (8 chars)  Frequency  % of Total  Cumulative %
         tXuneoZN        906        6.99          6.99
         TU4AVivT        686        5.29         12.28
         x-eBfsgO        414        3.19         15.47
         aTdMtnxQ        394        3.04         18.51
         ZDz09qEx        270        2.08         20.59
         g5xHgSjt        221        1.70         22.29
         IiN-TYoM        217        1.67         23.96
         CpMHp1xj        176        1.36         25.32
         3UavGAjy        172        1.33         26.65
         ~Go801IA        168        1.30         27.95
         ZB~nvHJj        148        1.14         29.09
         SLIdpSD6        144        1.11         30.20
         F9577L-s        140        1.08         31.28
         bUJ0DwK8        138        1.06         32.34
         SC7~pNJt        137        1.06         33.40
         z9DJ6Fjl        133        1.03         34.43
         RaGUK5Aq        132        1.02         35.45
         V

CHeck file 6 for top 20 and check fiel 7 for their frequencies

In [5]:
# VALIDATE: top-20 from file 6 vs frequencies in file 7
import re, os
import pandas as pd
from pathlib import Path

BASE = "/content/drive/MyDrive/DataInBrief-2025"
PATH6 = f"{BASE}/6-High-Capacity-Set.txt"
PATH7 = f"{BASE}/7-High-Capacity-Set-Freq.txt"   # or 8-High-Capaity-Freq2.txt

RID_RE = re.compile(r"([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})")

# --- extract all IDs from file 6 (appearance counts are only diagnostic) ---
ids6 = []
with open(PATH6, "r", errors="ignore") as f:
    for ln in f:
        m = RID_RE.search(ln)
        if m: ids6.append(m.group(0).strip())

counts6 = (pd.Series(ids6, name="node_id")
             .value_counts()
             .rename_axis("node_id")
             .reset_index(name="appearances_in_file6"))

top20_from6 = counts6.head(20).copy()

# --- load file 7 as the authoritative frequency table ---
df7 = pd.read_csv(PATH7, sep=r"\s+|\t+", engine="python",
                  header=None, names=["node_id","freq7"])

total7 = df7["freq7"].sum()

# --- join & report ---
j = top20_from6.merge(df7, on="node_id", how="left")
j["% of Total 7"] = 100 * j["freq7"] / total7
j["Cumulative % (7)"] = j["% of Total 7"].cumsum()
j_out = (j.assign(**{"Node (8)": j["node_id"].str[:8]})
           .loc[:, ["Node (8)","appearances_in_file6","freq7","% of Total 7","Cumulative % (7)"]]
           .rename(columns={"appearances_in_file6":"Appearances in file6",
                            "freq7":"Frequency (file7)"})
           .round({"% of Total 7":2,"Cumulative % (7)":2}))

print("\nTOP-20 from file 6 mapped to frequencies in file 7:")
print(j_out.sort_values("Frequency (file7)", ascending=False).to_string(index=False))

# coverage check
missing = j[j["freq7"].isna()]
print(f"\nCoverage: {len(j)-missing.shape[0]}/20 from file 6 found in file 7.")
if not missing.empty:
    print("Missing IDs (present in 6, absent in 7):")
    print("\n".join(missing["node_id"].tolist()))

print(f"\nTotal selections in file 7: {total7:,}")
print(f"Top-20 cumulative share (by file 7 frequencies): {j['% of Total 7'].sum():.2f}%")



TOP-20 from file 6 mapped to frequencies in file 7:
Node (8)  Appearances in file6  Frequency (file7)  % of Total 7  Cumulative % (7)
RaGUK5Aq                  2424              132.0          0.59              2.26
fgZvKWSc                  2511               84.0          0.38              1.51
opSZwy-w                  2751               73.0          0.33              1.10
tlVPuTPT                  2760               55.0          0.25              0.74
moCfkmqX                  3576               43.0          0.19              0.26
YXEAXlOW                  2350               36.0          0.16              2.45
TDQO~djJ                  3369               36.0          0.16              0.44
reNXIMuD                  2324               31.0          0.14              2.60
BHIY1YQz                  2429               22.0          0.10              1.66
IO~OIVIJ                  2260               18.0          0.08              2.68
Z1bh-f14                  3926               

**FINAL NEW TABLE 2 - INcludes Cummulative %**

In [6]:
# Table 2 (Top-20 High-Capacity nodes) from file 7 with % and Cumulative %
import pandas as pd
from pathlib import Path

BASE = "/content/drive/MyDrive/DataInBrief-2025"
PATH_FREQ = f"{BASE}/7-High-Capacity-Set-Freq.txt"   # authoritative (node_id \t frequency)
OUTDIR = Path(BASE) / "outputs"
OUTDIR.mkdir(parents=True, exist_ok=True)

# Load frequency table
df = pd.read_csv(PATH_FREQ, sep=r"\s+|\t+", header=None, names=["node_id","frequency"], engine="python")
df["node_id"]   = df["node_id"].astype(str).str.strip()
df["frequency"] = pd.to_numeric(df["frequency"], errors="coerce").fillna(0).astype(int)

# Rank + shares + cumulative
df = df.sort_values("frequency", ascending=False, kind="mergesort").reset_index(drop=True)
total = df["frequency"].sum()
df["% of Total"]   = (100.0 * df["frequency"] / total).round(2)
df["Cumulative %"] = df["% of Total"].cumsum().clip(upper=100).round(2)

# Top-20 for the manuscript
top20 = df.head(20).copy()
top20.insert(0, "Rank", range(1, len(top20)+1))
top20.insert(1, "Node ID (8 chars)", top20["node_id"].str[:8])

# Pretty print for quick paste into Word
print(top20[["Rank","Node ID (8 chars)","frequency","% of Total","Cumulative %"]]
      .rename(columns={"frequency":"Frequency"}).to_string(index=False))

# Save CSV for Table 2
out_csv = OUTDIR / "table2_top20_from_file7_with_cumulative.csv"
top20.rename(columns={"frequency":"Frequency"})[
    ["Rank","node_id","Frequency","% of Total","Cumulative %"]
].to_csv(out_csv, index=False)
print(f"\nSaved: {out_csv}")

# Text snippet you can paste under the table (shows overall concentration)
cum20 = float(top20["Cumulative %"].iloc[-1])
print(f"\nTop-20 cumulative share of total selections: {cum20:.2f}%")


 Rank Node ID (8 chars)  Frequency  % of Total  Cumulative %
    1          tXuneoZN        906        4.06          4.06
    2          TU4AVivT        686        3.07          7.13
    3          hA~tqWxf        583        2.61          9.74
    4          N7T9mVbu        425        1.90         11.64
    5          x-eBfsgO        414        1.86         13.50
    6          aTdMtnxQ        394        1.77         15.27
    7          WwNGsm99        306        1.37         16.64
    8          dz3ON-1U        301        1.35         17.99
    9          ZDz09qEx        270        1.21         19.20
   10          Fu-8GZY8        260        1.17         20.37
   11          IwOkup7K        255        1.14         21.51
   12          g5xHgSjt        221        0.99         22.50
   13          IiN-TYoM        217        0.97         23.47
   14          twbWgUGo        196        0.88         24.35
   15          CpMHp1xj        176        0.79         25.14
   16          3UavGAjy 