In [5]:
import pandas as pd
import numpy as np
import os
import re
from collections import defaultdict

class TempestAblationParser:
    def __init__(self, base_dir, batch_count=10):
        self.base_dir = base_dir
        self.batch_count = batch_count
        self.variants = ['v0', 'v1', 'v2', 'v3']
        self.results = defaultdict(list)

    def clean_numeric(self, value):
        """Helper to force string values into clean floats."""
        if value is None or pd.isna(value):
            return np.nan
        if isinstance(value, (int, float)):
            return float(value)
        clean_val = str(value).replace(',', '').strip()
        try:
            return float(clean_val)
        except ValueError:
            return np.nan

    def parse_stdout(self, file_path):
        """Extracts end-to-end throughput and walks from stdout.txt"""
        metrics = {}
        if not os.path.exists(file_path): return metrics
        with open(file_path, 'r') as f:
            content = f.read()
            # Throughput: e.g., 1.27369e+06 walks/sec
            t_match = re.search(r'Throughput:\s+([\d.e+-]+)', content)
            if t_match: metrics['throughput_ks'] = self.clean_numeric(t_match.group(1)) / 1000
            # Total walks
            w_match = re.search(r'Total walks:\s+(\d+)', content)
            if w_match: metrics['total_walks'] = self.clean_numeric(w_match.group(1))
        return metrics

    def parse_nsys_latencies(self, file_path):
        """Extracts NVTX latencies (Ingestion, Sort, Rebuild)"""
        metrics = {}
        if not os.path.exists(file_path): return metrics
        df = pd.read_csv(file_path)
        mapping = {
            ':ingestion_batch': 'ingest_ms',
            ':node_index_rebuild': 'rebuild_ms',
            ':ingestion_sort_merge': 'sort_ms'
        }
        for range_name, key in mapping.items():
            row = df[df['Range'] == range_name]
            if not row.empty:
                val = row['Avg (ns)'].values[0]
                metrics[key] = self.clean_numeric(val) / 1e6
        return metrics

    def parse_nsys_kernels(self, file_path, variant):
        """Variant-aware kernel parsing (Total Accumulated Time per Batch)"""
        metrics = {}
        if not os.path.exists(file_path): return metrics
        df = pd.read_csv(file_path)

        if 'v2' in variant: # Step-based logic
            # Sum total time of all step kernels and divide by total batches
            kernels = df[df['Name'].str.contains('pick_start_edges_kernel|pick_intermediate_edges_kernel', regex=True)]
            if not kernels.empty:
                total_gpu_ns = kernels['Total Time (ns)'].sum()
                metrics['sampling_gpu_ms'] = self.clean_numeric(total_gpu_ns / self.batch_count) / 1e6
        else: # Full-walk logic (v0, v1, v3)
            row = df[df['Name'].str.contains('generate_random_walks_kernel', na=False)]
            if not row.empty:
                # For full-walk, one launch = one batch
                metrics['sampling_gpu_ms'] = self.clean_numeric(row['Avg (ns)'].values[0]) / 1e6
        return metrics

    def parse_nsys_memory(self, file_path):
        """Extracts D2D volume for achieved bandwidth calculation"""
        if not os.path.exists(file_path): return {}
        df = pd.read_csv(file_path)
        d2d = df[df['Operation'].str.contains('Device-to-Device')]
        val = d2d['Total (MB)'].values[0] if not d2d.empty else 0
        return {'d2d_mb': self.clean_numeric(val)}

    def parse_ncu_metrics(self, file_path):
        """Extracts Occupancy and Registers"""
        if not os.path.exists(file_path): return {}
        df = pd.read_csv(file_path)
        data_df = df.apply(pd.to_numeric, errors='coerce').dropna(how='all')
        if data_df.empty: return {}

        row = data_df.iloc[0]
        return {
            'occupancy': self.clean_numeric(row.get('sm__maximum_warps_per_active_cycle_pct')),
            'registers': self.clean_numeric(row.get('launch__registers_per_thread'))
        }

    def process_all(self):
        """Walks the directory structure and collects all metrics."""
        for run_dir in sorted(os.listdir(self.base_dir)):
            run_path = os.path.join(self.base_dir, run_dir)
            if not os.path.isdir(run_path) or 'run-' not in run_dir: continue

            for var_dir in os.listdir(run_path):
                var_path = os.path.join(run_path, var_dir)
                variant_id = var_dir.split('_')[0]
                if variant_id not in self.variants: continue

                m = {}
                m.update(self.parse_stdout(os.path.join(var_path, 'stdout.txt')))
                m.update(self.parse_nsys_latencies(os.path.join(var_path, 'nsys_rebuild_latencies.csv')))
                m.update(self.parse_nsys_kernels(os.path.join(var_path, 'nsys_kernel_summary.csv'), variant_id))
                m.update(self.parse_nsys_memory(os.path.join(var_path, 'nsys_memory_transfers.csv')))
                m.update(self.parse_ncu_metrics(os.path.join(var_path, 'ncu_raw_metrics.csv')))

                # Calculate Achieved HBM Throughput
                if 'd2d_mb' in m and 'rebuild_ms' in m and m['rebuild_ms'] > 0:
                    bw_gbs = (m['d2d_mb'] / 1024) / (m['rebuild_ms'] / 1000)
                    m['achieved_hbm_util'] = (bw_gbs / 1555) * 100 # A100 Peak

                self.results[variant_id].append(m)

    def summarize(self):
        """Aggregates across runs to produce Mean Â± Std Dev."""
        summary = {}
        for var in self.variants:
            data_list = self.results[var]
            if not data_list: continue

            df = pd.DataFrame(data_list).apply(pd.to_numeric, errors='coerce')
            stats = df.agg(['mean', 'std']).T
            summary[var] = stats

            print(f"\n=== Final Results for {var} ===")
            print(stats[['mean', 'std']])
        return summary

In [6]:
parser = TempestAblationParser('/Users/ashfaqsalehin/Documents/ablation_results_parsed')
parser.process_all()
parser.summarize()


=== Final Results for v0 ===
                           mean       std
throughput_ks      1.274122e+03  4.588695
total_walks        2.050816e+07  0.000000
ingest_ms          6.290281e+01  0.359079
rebuild_ms         5.545217e+00  0.022093
sort_ms            3.204124e+00  0.023669
sampling_gpu_ms    1.046515e+02  0.036255
d2d_mb             7.126649e+03  0.000000
occupancy          3.750000e+01  0.000000
registers          7.000000e+01  0.000000
achieved_hbm_util  8.071272e+01  0.321404

=== Final Results for v1 ===
                           mean       std
throughput_ks      1.282898e+03  5.344134
total_walks        2.050816e+07  0.000000
ingest_ms          6.334607e+01  0.482846
rebuild_ms         5.581059e+00  0.063290
sort_ms            3.241443e+00  0.024114
sampling_gpu_ms    1.011157e+02  0.031938
d2d_mb             7.126649e+03  0.000000
occupancy          5.000000e+01  0.000000
registers          6.400000e+01  0.000000
achieved_hbm_util  8.020158e+01  0.906240

=== Final Resul

{'v0':                            mean       std
 throughput_ks      1.274122e+03  4.588695
 total_walks        2.050816e+07  0.000000
 ingest_ms          6.290281e+01  0.359079
 rebuild_ms         5.545217e+00  0.022093
 sort_ms            3.204124e+00  0.023669
 sampling_gpu_ms    1.046515e+02  0.036255
 d2d_mb             7.126649e+03  0.000000
 occupancy          3.750000e+01  0.000000
 registers          7.000000e+01  0.000000
 achieved_hbm_util  8.071272e+01  0.321404,
 'v1':                            mean       std
 throughput_ks      1.282898e+03  5.344134
 total_walks        2.050816e+07  0.000000
 ingest_ms          6.334607e+01  0.482846
 rebuild_ms         5.581059e+00  0.063290
 sort_ms            3.241443e+00  0.024114
 sampling_gpu_ms    1.011157e+02  0.031938
 d2d_mb             7.126649e+03  0.000000
 occupancy          5.000000e+01  0.000000
 registers          6.400000e+01  0.000000
 achieved_hbm_util  8.020158e+01  0.906240,
 'v2':                            mean  