In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from google.colab import drive
import re

# 1. Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# Define file paths
BASE_PATH = '/content/drive/MyDrive/DataInBrief-2025/'
HICAP_FREQ_FILE = BASE_PATH + '7-High-Capacity-Set-Freq.txt'
HICAP_TIME_FILE = BASE_PATH + '6-High-Capacity-Set.txt'
PROFILES_FILE = BASE_PATH + '10-Profiles-By-Country-anonymized.csv'

# --- 2. Metric Functions ---
def calculate_entropy(counts):
    """Calculates Shannon Entropy from a frequency distribution."""
    c = np.asarray(counts, dtype=float)
    c = c[c > 0]
    if c.size == 0: return 0.0
    p = np.clip(c / c.sum(), 1e-300, 1.0)
    return float(-(p * np.log2(p)).sum())

def calculate_gini(counts):
    """Calculates Gini coefficient from a frequency distribution."""
    c = np.asarray(counts, dtype=float)
    c = c[c > 0]
    if c.size == 0: return 0.0
    s = np.sort(c)
    n = s.size
    cum = np.cumsum(s)
    tot = s.sum()
    if tot == 0: return 0.0
    return float(1.0 - (2.0 * cum.sum()) / (n * tot) + (1.0 / n))

# --- 3. Static Table 2 Calculation (File 7) ---
def generate_static_table2(path_freq):
    """Generates the static top 20 table with Cumulative Percentage."""
    print("\n--- Generating Static Table 2 (Top 20 Frequencies from File 7) ---")
    df_freq = pd.read_csv(path_freq, sep=r'\s+', header=None, names=['node_id', 'frequency'], engine='python', usecols=[0, 1], skiprows=1)

    # Cleaning and sorting
    df_freq["node_id"] = df_freq["node_id"].astype(str).str.strip()
    df_freq["frequency"] = pd.to_numeric(df_freq["frequency"], errors="coerce").fillna(0)
    df_freq = df_freq.groupby("node_id", as_index=False)["frequency"].sum()
    df_top20 = df_freq.sort_values("frequency", ascending=False).head(20).copy()

    # Calculations
    counts = df_top20['frequency'].values
    total_top20_freq = df_top20['frequency'].sum()
    df_top20['cumulative_frequency'] = df_top20['frequency'].cumsum()
    df_top20['Cumulative Percentage (%)'] = (df_top20['cumulative_frequency'] / total_top20_freq) * 100

    # Final Formatting (for display/manuscript)
    df_top20['Rank'] = np.arange(1, len(df_top20) + 1)
    df_final_table = df_top20[['Rank', 'node_id', 'frequency', 'Cumulative Percentage (%)']].copy()
    df_final_table.columns = ['Rank', 'Node ID', 'Selection Frequency', 'Cumulative Percentage (%)']
    df_final_table['Cumulative Percentage (%)'] = df_final_table['Cumulative Percentage (%)'].round(2)
    df_final_table['Node ID'] = df_final_table['Node ID'].str.slice(0, 8) + '...'

    print(f"Static Gini (G): {calculate_gini(counts):.4f}")
    print(f"Static Entropy (H): {calculate_entropy(counts):.4f}")
    print(df_final_table.to_markdown(index=False))
    return df_final_table

# --- 4. Temporal Analysis Core Logic (File 6) ---
def load_high_cap_time(path: str) -> pd.DataFrame:
    """Robust loader for 6-High-Capacity-Set.txt (event log)."""
    TIME_RE = re.compile(r'\s(\d{2}:\d{2}:\d{2})\s')
    NODE_ID_RE = re.compile(r'Node\s+ID:\s+([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})')
    ASSUMED_DATE = '2024-11-17'
    rows = []
    with open(path, "r", errors="ignore") as f:
        for ln in f:
            time_match = TIME_RE.search(ln)
            id_match = NODE_ID_RE.search(ln)
            if time_match and id_match:
                time_str = time_match.group(1)
                node_id = id_match.group(1).strip()
                try:
                    # Parse assuming fixed date derived from other logs
                    ts = pd.to_datetime(f"{ASSUMED_DATE} {time_str}", errors="coerce")
                    if pd.notna(ts):
                        rows.append({'timestamp': ts, 'node_id': node_id})
                except Exception:
                    continue
    return pd.DataFrame(rows).sort_values("timestamp").reset_index(drop=True)


def perform_rolling_analysis(df_temporal, df_profiles, window='2H', freq='30T'):
    """Performs the rolling window analysis and correlation."""

    # Load and clean profiles for Caps column
    df_profiles.columns = [c.strip() for c in df_profiles.columns]
    df_profiles = df_profiles.rename(columns={'Full Node ID': 'node_id', 'Caps': 'Caps'})

    df_temporal_merged = pd.merge(df_temporal, df_profiles[['node_id', 'Caps']], on='node_id', how='left')
    df_temporal_merged.set_index('timestamp', inplace=True)

    results = []
    time_points = pd.date_range(start=df_temporal_merged.index.min(), end=df_temporal_merged.index.max(), freq=freq)

    for start_time in time_points:
        end_time = start_time + pd.Timedelta(window)
        window_data = df_temporal_merged.loc[start_time:end_time]

        if len(window_data) >= 50: # Use 50 as minimum samples for stability
            freq_counts = window_data['node_id'].value_counts()
            probabilities = freq_counts / freq_counts.sum()
            entropy = calculate_entropy(probabilities)
            gini = calculate_gini(freq_counts.values)

            caps_data = window_data['Caps'].dropna().unique()
            # Calculate share of XfR nodes
            has_XfR = np.sum(['X' in str(c) and 'f' in str(c) and 'R' in str(c) for c in caps_data])
            share_XfR = has_XfR / max(len(caps_data), 1)

            results.append({
                'timestamp': start_time,
                'entropy': entropy,
                'gini': gini,
                'share_XfR': share_XfR,
                'unique_nodes': len(freq_counts)
            })

    df_results = pd.DataFrame(results)
    print("\n--- High-Capacity Temporal Metrics (Time-Series Table) ---")
    print(f"Total Windows Computed: {len(df_results)}")
    print(f"Average Gini: {df_results['gini'].mean():.4f}")
    print(df_results[['timestamp', 'gini', 'entropy', 'share_XfR']].head(10).to_markdown(index=False))
    return df_results

# --- Main Execution ---
if __name__ == "__main__":
    try:
        # Load Profiles once
        df_profiles = pd.read_csv(PROFILES_FILE)

        # 1. Generate Static Table 2
        df_table_2 = generate_static_table2(HICAP_FREQ_FILE)

        # 2. Generate Time-Series Table
        df_high_cap_time = load_high_cap_time(HICAP_TIME_FILE)
        df_time_series = perform_rolling_analysis(df_high_cap_time, df_profiles)

    except Exception as e:
        print(f"A fatal error occurred during script execution: {e}")

Mounting Google Drive...
Mounted at /content/drive

--- Generating Static Table 2 (Top 20 Frequencies from File 7) ---
Static Gini (G): 0.2731
Static Entropy (H): 4.1467
|   Rank | Node ID     |   Selection Frequency |   Cumulative Percentage (%) |
|-------:|:------------|----------------------:|----------------------------:|
|      1 | TU4AVivT... |                   687 |                       12.2  |
|      2 | hA~tqWxf... |                   585 |                       22.58 |
|      3 | N7T9mVbu... |                   425 |                       30.13 |
|      4 | x-eBfsgO... |                   417 |                       37.53 |
|      5 | aTdMtnxQ... |                   395 |                       44.54 |
|      6 | WwNGsm99... |                   309 |                       50.03 |
|      7 | dz3ON-1U... |                   301 |                       55.37 |
|      8 | ZDz09qEx... |                   270 |                       60.16 |
|      9 | Fu-8GZY8... |                

  time_points = pd.date_range(start=df_temporal_merged.index.min(), end=df_temporal_merged.index.max(), freq=freq)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(


--- High-Capacity Temporal Metrics (Time-Series Table) ---
Total Windows Computed: 28
Average Gini: 0.6758
| timestamp           |     gini |   entropy |   share_XfR |
|:--------------------|---------:|----------:|------------:|
| 2024-11-17 10:00:09 | 0.671497 |   8.68676 |   0.0967742 |
| 2024-11-17 10:30:09 | 0.670402 |   8.71162 |   0.09375   |
| 2024-11-17 11:00:09 | 0.66527  |   8.74944 |   0.0909091 |
| 2024-11-17 11:30:09 | 0.667267 |   8.78537 |   0.0882353 |
| 2024-11-17 12:00:09 | 0.668076 |   8.82306 |   0.0909091 |
| 2024-11-17 12:30:09 | 0.672325 |   8.79891 |   0.0882353 |
| 2024-11-17 13:00:09 | 0.66841  |   8.78667 |   0.0909091 |
| 2024-11-17 13:30:09 | 0.669235 |   8.78671 |   0.09375   |
| 2024-11-17 14:00:09 | 0.675977 |   8.76115 |   0.0909091 |
| 2024-11-17 14:30:09 | 0.668072 |   8.78364 |   0.09375   |


  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)
  end_time = start_time + pd.Timedelta(window)


SCRIPT 2 - FINAL

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from google.colab import drive
import re

# --- 0. Configuration and Setup ---
print("Mounting Google Drive...")
# You should already have this line in your Colab notebook
# drive.mount('/content/drive')

# Define file paths
BASE_PATH = '/content/drive/MyDrive/DataInBrief-2025/'
HICAP_FREQ_FILE = BASE_PATH + '7-High-Capacity-Set-Freq.txt'
HICAP_TIME_FILE = BASE_PATH + '6-High-Capacity-Set.txt'
PROFILES_FILE = BASE_PATH + '10-Profiles-By-Country-anonymized.csv'

# --- 1. Metric Functions ---
def calculate_entropy(counts):
    """Calculates Shannon Entropy from a frequency distribution."""
    c = np.asarray(counts, dtype=float)
    c = c[c > 0]
    if c.size == 0: return 0.0
    p = np.clip(c / c.sum(), 1e-300, 1.0)
    return float(-(p * np.log2(p)).sum())

def calculate_gini(counts):
    """Calculates Gini coefficient from a frequency distribution."""
    c = np.asarray(counts, dtype=float)
    c = c[c > 0]
    if c.size == 0: return 0.0
    s = np.sort(c)
    n = s.size
    cum = np.cumsum(s)
    tot = s.sum()
    if tot == 0: return 0.0
    return float(1.0 - (2.0 * cum.sum()) / (n * tot) + (1.0 / n))

# --- 2. Static Table 2 Calculation (File 7) ---
def generate_static_table2(path_freq):
    """Generates the static top 20 table with Cumulative Percentage."""
    print("\n--- Generating Static Table 2 (Top 20 Frequencies from File 7) ---")
    df_freq = pd.read_csv(path_freq, sep=r'\s+', header=None, names=['node_id', 'frequency'], engine='python', usecols=[0, 1], skiprows=1)

    # Cleaning and sorting
    df_freq["node_id"] = df_freq["node_id"].astype(str).str.strip()
    df_freq["frequency"] = pd.to_numeric(df_freq["frequency"], errors="coerce").fillna(0)
    df_freq = df_freq.groupby("node_id", as_index=False)["frequency"].sum()
    df_top20 = df_freq.sort_values("frequency", ascending=False).head(20).copy()

    # Calculations
    counts = df_top20['frequency'].values
    total_top20_freq = df_top20['frequency'].sum()
    df_top20['cumulative_frequency'] = df_top20['frequency'].cumsum()
    df_top20['Cumulative Percentage (%)'] = (df_top20['cumulative_frequency'] / total_top20_freq) * 100

    # Final Formatting
    df_top20['Rank'] = np.arange(1, len(df_top20) + 1)
    df_final_table = df_top20[['Rank', 'node_id', 'frequency', 'Cumulative Percentage (%)']].copy()
    df_final_table.columns = ['Rank', 'Node ID', 'Selection Frequency', 'Cumulative Percentage (%)']
    df_final_table['Cumulative Percentage (%)'] = df_final_table['Cumulative Percentage (%)'].round(2)
    df_final_table['Node ID'] = df_final_table['Node ID'].str.slice(0, 8) + '...'

    print(f"Static Gini (G): {calculate_gini(counts):.4f}")
    print(f"Static Entropy (H): {calculate_entropy(counts):.4f}")
    print(df_final_table.to_markdown(index=False))
    return df_final_table

# --- 3. Temporal Analysis Core Logic (File 6) ---
def load_high_cap_time(path: str) -> pd.DataFrame:
    """Robust loader for 6-High-Capacity-Set.txt (event log)."""
    TIME_RE = re.compile(r'\s(\d{2}:\d{2}:\d{2})\s')
    NODE_ID_RE = re.compile(r'Node\s+ID:\s+([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})')
    ASSUMED_DATE = '2024-11-17'
    rows = []
    with open(path, "r", errors="ignore") as f:
        for ln in f:
            time_match = TIME_RE.search(ln)
            id_match = NODE_ID_RE.search(ln)
            if time_match and id_match:
                time_str = time_match.group(1)
                node_id = id_match.group(1).strip()
                try:
                    # Parse assuming fixed date derived from other logs
                    ts = pd.to_datetime(f"{ASSUMED_DATE} {time_str}", errors="coerce")
                    if pd.notna(ts):
                        rows.append({'timestamp': ts, 'node_id': node_id})
                except Exception:
                    continue
    return pd.DataFrame(rows).sort_values("timestamp").reset_index(drop=True)


def perform_rolling_analysis(df_temporal, df_profiles, window='2h', freq='30min'):
    """Performs the rolling window analysis and correlation."""

    # Load and clean profiles for Caps column
    df_profiles.columns = [c.strip() for c in df_profiles.columns]
    df_profiles = df_profiles.rename(columns={'Full Node ID': 'node_id', 'Caps': 'Caps'})

    df_temporal_merged = pd.merge(df_temporal, df_profiles[['node_id', 'Caps']], on='node_id', how='left')
    df_temporal_merged.set_index('timestamp', inplace=True)

    results = []
    time_points = pd.date_range(start=df_temporal_merged.index.min(), end=df_temporal_merged.index.max(), freq=freq)

    for start_time in time_points:
        end_time = start_time + pd.Timedelta(window)
        window_data = df_temporal_merged.loc[start_time:end_time]

        if len(window_data) >= 50:
            freq_counts = window_data['node_id'].value_counts()
            probabilities = freq_counts / freq_counts.sum()
            entropy = calculate_entropy(probabilities)
            gini = calculate_gini(freq_counts.values)

            caps_data = window_data['Caps'].dropna().unique()
            # Calculate share of XfR nodes
            has_XfR = np.sum(['X' in str(c) and 'f' in str(c) and 'R' in str(c) for c in caps_data])
            share_XfR = has_XfR / max(len(caps_data), 1)

            results.append({
                'timestamp': start_time,
                'entropy': entropy,
                'gini': gini,
                'share_XfR': share_XfR,
                'unique_nodes': len(freq_counts)
            })

    df_results = pd.DataFrame(results)
    print("\n--- High-Capacity Temporal Metrics (Time-Series Table) ---")
    print(f"Total Windows Computed: {len(df_results)}")
    print(f"Average Gini: {df_results['gini'].mean():.4f}")
    print(df_results[['timestamp', 'gini', 'entropy', 'share_XfR']].head(10).to_markdown(index=False))
    return df_results

# --- Main Execution ---
if __name__ == "__main__":
    try:
        # Load Profiles once
        df_profiles = pd.read_csv(PROFILES_FILE)

        # 1. Generate Static Table 2
        df_table_2 = generate_static_table2(HICAP_FREQ_FILE)

        # 2. Generate Time-Series Table
        df_high_cap_time = load_high_cap_time(HICAP_TIME_FILE)
        df_time_series = perform_rolling_analysis(df_high_cap_time, df_profiles)

    except Exception as e:
        print(f"A fatal error occurred during script execution: {e}")

Mounting Google Drive...

--- Generating Static Table 2 (Top 20 Frequencies from File 7) ---
Static Gini (G): 0.2731
Static Entropy (H): 4.1467
|   Rank | Node ID     |   Selection Frequency |   Cumulative Percentage (%) |
|-------:|:------------|----------------------:|----------------------------:|
|      1 | TU4AVivT... |                   687 |                       12.2  |
|      2 | hA~tqWxf... |                   585 |                       22.58 |
|      3 | N7T9mVbu... |                   425 |                       30.13 |
|      4 | x-eBfsgO... |                   417 |                       37.53 |
|      5 | aTdMtnxQ... |                   395 |                       44.54 |
|      6 | WwNGsm99... |                   309 |                       50.03 |
|      7 | dz3ON-1U... |                   301 |                       55.37 |
|      8 | ZDz09qEx... |                   270 |                       60.16 |
|      9 | Fu-8GZY8... |                   260 |                  

Generate New - With Missing %Total

In [None]:
import pandas as pd
import numpy as np
from google.colab import drive
import re

# --- 0. Configuration and Setup ---
print("Mounting Google Drive...")
drive.mount('/content/drive')

# Define file paths
BASE_PATH = '/content/drive/MyDrive/DataInBrief-2025/'
HICAP_FREQ_FILE = BASE_PATH + '7-High-Capacity-Set-Freq.txt'
HICAP_TIME_FILE = BASE_PATH + '6-High-Capacity-Set.txt'
PROFILES_FILE = BASE_PATH + '10-Profiles-By-Country-anonymized.csv'

# --- 1. Metric Functions (Unchanged) ---
def calculate_entropy(counts):
    """Calculates Shannon Entropy from a frequency distribution."""
    c = np.asarray(counts, dtype=float)
    c = c[c > 0]
    if c.size == 0: return 0.0
    p = np.clip(c / c.sum(), 1e-300, 1.0)
    return float(-(p * np.log2(p)).sum())

def calculate_gini(counts):
    """Calculates Gini coefficient from a frequency distribution."""
    c = np.asarray(counts, dtype=float)
    c = c[c > 0]
    if c.size == 0: return 0.0
    s = np.sort(c)
    n = s.size
    cum = np.cumsum(s)
    tot = s.sum()
    if tot == 0: return 0.0
    return float(1.0 - (2.0 * cum.sum()) / (n * tot) + (1.0 / n))

# --- 2. Static Table 2 Calculation (File 7) - FINAL CORRECTION ---
def generate_static_table2(path_freq):
    """Generates the static top 20 table with Cumulative Percentage and % of Total."""
    print("\n--- Generating Static Table 2 (Top 20 Frequencies from File 7) ---")
    df_freq = pd.read_csv(path_freq, sep=r'\s+', header=None, names=['node_id', 'frequency'], engine='python', usecols=[0, 1], skiprows=1)

    # Cleaning and sorting
    df_freq["node_id"] = df_freq["node_id"].astype(str).str.strip()
    df_freq["frequency"] = pd.to_numeric(df_freq["frequency"], errors="coerce").fillna(0)
    df_freq = df_freq.groupby("node_id", as_index=False)["frequency"].sum()
    df_top20 = df_freq.sort_values("frequency", ascending=False).head(20).copy()

    # Calculations
    counts = df_top20['frequency'].values
    total_top20_freq = df_top20['frequency'].sum()

    # CRITICAL: Calculate individual percentage of the top 20 subset
    df_top20['% of Total (Node)'] = (df_top20['frequency'] / total_top20_freq) * 100

    # Calculate Cumulative Percentage
    df_top20['cumulative_frequency'] = df_top20['frequency'].cumsum()
    df_top20['Cumulative Percentage (%)'] = (df_top20['cumulative_frequency'] / total_top20_freq) * 100

    # Final Formatting
    df_top20['Rank'] = np.arange(1, len(df_top20) + 1)
    df_final_table = df_top20[['Rank', 'node_id', 'frequency', '% of Total (Node)', 'Cumulative Percentage (%)']].copy()

    # Rename columns for manuscript
    df_final_table.columns = ['Rank', 'Node ID', 'Selection Frequency', '% of Total', 'Cumulative Percentage (%)']

    # Rounding and Node ID formatting
    df_final_table['% of Total'] = df_final_table['% of Total'].round(2)
    df_final_table['Cumulative Percentage (%)'] = df_final_table['Cumulative Percentage (%)'].round(2)
    df_final_table['Node ID'] = df_final_table['Node ID'].str.slice(0, 8) + '...'

    print(f"Static Gini (G): {calculate_gini(counts):.4f}")
    print(f"Static Entropy (H): {calculate_entropy(counts):.4f}")
    print(df_final_table.to_markdown(index=False))
    return df_final_table

# --- 3. Temporal Analysis Core Logic (File 6) ---
def load_high_cap_time(path: str) -> pd.DataFrame:
    """Robust loader for 6-High-Capacity-Set.txt (event log)."""
    TIME_RE = re.compile(r'\s(\d{2}:\d{2}:\d{2})\s')
    NODE_ID_RE = re.compile(r'Node\s+ID:\s+([A-Za-z0-9\-\~\+\/]{43}=|[A-Za-z0-9\-\~\+\/]{44})')
    ASSUMED_DATE = '2024-11-17'
    rows = []
    with open(path, "r", errors="ignore") as f:
        for ln in f:
            time_match = TIME_RE.search(ln)
            id_match = NODE_ID_RE.search(ln)
            if time_match and id_match:
                time_str = time_match.group(1)
                node_id = id_match.group(1).strip()
                try:
                    # Parse assuming fixed date derived from other logs
                    ts = pd.to_datetime(f"{ASSUMED_DATE} {time_str}", errors="coerce")
                    if pd.notna(ts):
                        rows.append({'timestamp': ts, 'node_id': node_id})
                except Exception:
                    continue
    return pd.DataFrame(rows).sort_values("timestamp").reset_index(drop=True)


def perform_rolling_analysis(df_temporal, df_profiles, window='2h', freq='30min'):
    """Performs the rolling window analysis and correlation."""

    # Load and clean profiles for Caps column
    df_profiles.columns = [c.strip() for c in df_profiles.columns]
    df_profiles = df_profiles.rename(columns={'Full Node ID': 'node_id', 'Caps': 'Caps'})

    df_temporal_merged = pd.merge(df_temporal, df_profiles[['node_id', 'Caps']], on='node_id', how='left')
    df_temporal_merged.set_index('timestamp', inplace=True)

    results = []
    time_points = pd.date_range(start=df_temporal_merged.index.min(), end=df_temporal_merged.index.max(), freq=freq)

    for start_time in time_points:
        end_time = start_time + pd.Timedelta(window)
        window_data = df_temporal_merged.loc[start_time:end_time]

        if len(window_data) >= 50:
            freq_counts = window_data['node_id'].value_counts()
            probabilities = freq_counts / freq_counts.sum()
            entropy = calculate_entropy(probabilities)
            gini = calculate_gini(freq_counts.values)

            caps_data = window_data['Caps'].dropna().unique()
            # Calculate share of XfR nodes
            has_XfR = np.sum(['X' in str(c) and 'f' in str(c) and 'R' in str(c) for c in caps_data])
            share_XfR = has_XfR / max(len(caps_data), 1)

            results.append({
                'timestamp': start_time,
                'entropy': entropy,
                'gini': gini,
                'share_XfR': share_XfR,
                'unique_nodes': len(freq_counts)
            })

    df_results = pd.DataFrame(results)
    print("\n--- High-Capacity Temporal Metrics (Time-Series Table) ---")
    print(f"Total Windows Computed: {len(df_results)}")
    print(f"Average Gini: {df_results['gini'].mean():.4f}")
    print(df_results[['timestamp', 'gini', 'entropy', 'share_XfR']].head(10).to_markdown(index=False))
    return df_results

# --- Main Execution ---
if __name__ == "__main__":
    try:
        # Load Profiles once
        df_profiles = pd.read_csv(PROFILES_FILE)

        # 1. Generate Static Table 2
        df_table_2 = generate_static_table2(HICAP_FREQ_FILE)

        # 2. Generate Time-Series Table
        df_high_cap_time = load_high_cap_time(HICAP_TIME_FILE)
        df_time_series = perform_rolling_analysis(df_high_cap_time, df_profiles)

    except Exception as e:
        print(f"A fatal error occurred during script execution: {e}")

Mounting Google Drive...
Mounted at /content/drive

--- Generating Static Table 2 (Top 20 Frequencies from File 7) ---
Static Gini (G): 0.2731
Static Entropy (H): 4.1467
|   Rank | Node ID     |   Selection Frequency |   % of Total |   Cumulative Percentage (%) |
|-------:|:------------|----------------------:|-------------:|----------------------------:|
|      1 | TU4AVivT... |                   687 |        12.2  |                       12.2  |
|      2 | hA~tqWxf... |                   585 |        10.39 |                       22.58 |
|      3 | N7T9mVbu... |                   425 |         7.54 |                       30.13 |
|      4 | x-eBfsgO... |                   417 |         7.4  |                       37.53 |
|      5 | aTdMtnxQ... |                   395 |         7.01 |                       44.54 |
|      6 | WwNGsm99... |                   309 |         5.49 |                       50.03 |
|      7 | dz3ON-1U... |                   301 |         5.34 |               