# Visualization of time/space complexity of `tab_err`

This notebook visualizes the runtime and memory usage of various error models in the tab err library


In [1]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import re
import os

In [None]:
# Read in data
num_path = "../results/numeric_times_new.csv"  # change this if you want to use the old data -- remove "_new"
num_data = pd.read_csv(num_path)

str_path = "../results/string_times_new.csv"  # change this if you want to use the old data -- remove "_new"
str_data = pd.read_csv(str_path)

print(num_data.head())
print(str_data.head())

   EAR-AddDelta-0.1-100-2  EAR-AddDelta-0.1-100-4  EAR-AddDelta-0.1-100-6  \
0                0.065886                0.048989                0.047907   
1                0.049014                0.047832                0.046399   
2                0.048264                0.049292                0.047410   
3                0.045544                0.047549                0.054327   
4                0.045911                0.048524                0.049987   

   EAR-AddDelta-0.1-100-8  EAR-AddDelta-0.1-100-10  EAR-AddDelta-0.1-1000-2  \
0                0.052615                 0.047336                 0.047655   
1                0.048359                 0.048132                 0.047802   
2                0.047119                 0.046614                 0.047627   
3                0.047122                 0.047341                 0.047275   
4                0.047219                 0.048712                 0.047704   

   EAR-AddDelta-0.1-1000-4  EAR-AddDelta-0.1-1000-6  EAR-AddDe

In [None]:
def plot_timing_by_model(df: pd.DataFrame, out_dir: str = "../plots", use_median: bool = False):
    """
    Plots execution time vs. number of rows for each unique error model descriptor.

    Args:
        df (pd.DataFrame): A DataFrame where each column represents timing results
            for a unique (error_mechanism-error_type-error_rate-n_rows) combination.
    """
    
    # Parse and group by base descriptor
    pattern = re.compile(r"(.+)-(\d+)-(\d+)(?:-mem_MB)?$")
    groups = {}
    
    for col in df.columns:
        match = pattern.match(col)
        if not match:
            continue
        base_desc, n_rows, n_col = match.groups()
        is_mem = col.endswith("-mem_MB")
        
        key = base_desc
        if key not in groups:
            groups[key] = {"time": [], "memory": []}
        
        entry = (int(n_rows), int(n_col), col)
        
        if is_mem:
            groups[key]["memory"].append(entry)
        else:
            groups[key]["time"].append(entry)

    for base_desc, data in groups.items():
        for kind in ["time", "memory"]:
            entries = data[kind]
        
            if not entries:
                continue
            
            # sort by rows and cols
            entries.sort()
            n_rows_set = sorted(set([r for r, c, _ in entries]))
            n_cols_set = sorted(set([c for r, c, _ in entries]))
            
            Z = np.full((len(n_rows_set), len(n_cols_set)), np.nan)
            for r, c, col in entries:
                i = n_rows_set.index(r)
                j = n_cols_set.index(c)
                val = df[col].median() if use_median else df[col].mean()
                Z[i,j] = val
            
            
            # create meshgrid
            X, Y = np.meshgrid(n_cols_set, n_rows_set)

            fig = plt.figure(figsize=(10, 8))
            ax = fig.add_subplot(111, projection='3d')
            ax.plot_surface(X, Y, Z, cmap=cm.viridis, edgecolor='k')

            ax.set_xlabel("Number of Columns")
            ax.set_ylabel("Number of Rows")
            zlabel = "Memory (MB)" if kind == "memory" else "Runtime (s)"
            ax.set_zlabel(zlabel)
            ax.set_title(f"{zlabel} as a Function of n_cols and n_rows \n({' '.join(base_desc.split('-'))})")
            
            
            # Save
            safe_name = base_desc.replace("/", "_").replace(" ", "_")
            suffix = "mem" if kind == "memory" else "time"
            plt.savefig(os.path.join(out_dir, f"new_{safe_name}_surface_{suffix}.pdf"))
            plt.close()


### Plot Numeric Error Model Profiling

In [None]:
plot_timing_by_model(num_data)

### Plot String Error Model Profiling

In [6]:
plot_timing_by_model(str_data)