# Profiling Artifact Explorer

This notebook inspects everything under `profiles/` (CPU, GPU, trace, and response outputs) and builds visual + tabular analyses to spot performance culprits (CPU vs GPU bottlenecks, hot functions, underutilization patterns).

In [3]:
# Section 1: Enumerate Profile Directory Contents
from pathlib import Path
import pandas as pd
import os, time
BASE = Path("profiles")
rows = []
for p in BASE.rglob('*'):
    if p.is_file():
        stat = p.stat()
        rows.append({
            'relative': p.relative_to(BASE).as_posix(),
            'size_bytes': stat.st_size,
            'modified': time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(stat.st_mtime)),
            'ext': p.suffix.lower()
        })
if rows:
    files_df = pd.DataFrame(rows)
    if 'relative' in files_df.columns:
        files_df = files_df.sort_values('relative')
else:
    files_df = pd.DataFrame(columns=['relative','size_bytes','modified','ext'])
files_df

Unnamed: 0,relative,size_bytes,modified,ext


In [4]:
# Section 2: Infer File Types and Metadata
import re
meta_rows = []
pattern = re.compile(r'(trace|dmon|mpstat|response)_(\d{8}T\d{6})')
for _, r in files_df.iterrows():
    m = pattern.search(r['relative'])
    run_id = m.group(2) if m else None
    kind = m.group(1) if m else None
    meta_rows.append({**r, 'run_id': run_id, 'kind': kind})
meta_df = pd.DataFrame(meta_rows)
meta_df

In [5]:
# Section 16 (placed early for reuse): Helper Utilities
import json, statistics
from typing import Dict, Any

# Guarantee expected columns exist even if meta_df was created from zero rows
expected_cols = ['relative','size_bytes','modified','ext','run_id','kind']
for c in expected_cols:
    if c not in meta_df.columns:
        meta_df[c] = None

def classify_file(path: str) -> str:
    if not isinstance(path, str):
        return 'unknown'
    if path.endswith('.json') and 'trace_' in path:
        return 'torch_trace'
    if 'dmon_' in path:
        return 'gpu_dmon'
    if 'mpstat_' in path:
        return 'cpu_mpstat'
    if 'response_' in path:
        return 'response'
    return 'other'

if 'relative' in meta_df.columns:
    meta_df['class'] = meta_df['relative'].apply(classify_file)
else:
    meta_df['class'] = []
meta_df

KeyError: 'relative'

In [None]:
# Section 7: Parse GPU dmon and CPU mpstat Logs
import pandas as pd
from io import StringIO

def load_dmon(path: Path) -> pd.DataFrame:
    txt = path.read_text()
    lines = [l for l in txt.splitlines() if l and not l.startswith('#')]
    if not lines:
        return pd.DataFrame()
    cols = ['date','time','gpu','pwr','gtemp','mtemp','sm','mem','enc','dec','jpg','ofa','mclk','pclk','pviol','tviol','fb','bar1','ccpm','sbecc','dbecc','pci','rxpci','txpci']
    data=[]
    for l in lines:
        parts=l.split()
        if len(parts) < len(cols):
            continue
        row=dict(zip(cols,parts[:len(cols)]))
        data.append(row)
    df=pd.DataFrame(data)
    numeric=['pwr','gtemp','mtemp','sm','mem','mclk','pclk','fb']
    for c in numeric:
        df[c]=pd.to_numeric(df[c], errors='coerce')
    df['ts']=pd.to_datetime(df['date']+' '+df['time'])
    return df

def load_mpstat(path: Path) -> pd.DataFrame:
    lines=path.read_text().splitlines()
    rows=[]
    for l in lines:
        if not l or l.startswith('Linux') or l.startswith('CPU ') or l.startswith('04:'): # rely on repeating header
            pass
        parts=l.split()
        if len(parts)==11 and parts[1].isdigit():
            # time line grouped earlier; skip
            continue
    # Simplified placeholder: real parser would extract per-core; omitted for brevity
    return pd.DataFrame()

gpu_df = pd.DataFrame()
for p in BASE.glob('nvidia/dmon_*.log'):
    gpu_df = load_dmon(p)
    break
gpu_df.head()

In [None]:
# Section: Heuristic Classification (GPU vs CPU bound)
classification = {}
if not gpu_df.empty:
    mean_sm = gpu_df['sm'].mean()
    peak_sm = gpu_df['sm'].max()
    mem_bw = gpu_df['mem'].mean()
    classification['mean_sm'] = mean_sm
    classification['peak_sm'] = peak_sm
    classification['mean_mem_util'] = mem_bw
    if mean_sm < 50:
        classification['verdict'] = 'Likely CPU / launch bound (low average SM utilization)'
    else:
        classification['verdict'] = 'GPU fairly utilized; inspect kernel efficiency next'
classification