In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt

# Generate dataframe

In [None]:
inpath = 'data/codedqr-data-3249593.csv'
outpath = None

if (len(sys.argv) == 3):
    inpath = sys.argv[1]
    outpath = sys.argv[2]

print(f'making plots from {inpath}')

# load csv file into a pandas dataframe
df = pd.read_csv(inpath)

df['total'] = df.iloc[:, 3:].sum(axis=1) - df['recovery']

df = df[['n', 'p', 'f', 'recovery','final solve','post-ortho','cs construct','pbmgs','total']]

# df.to_csv('data/codedqr-data-3249593.csv', index=False)


## Total job runtime

In [None]:

seconds = int(df['total'].sum())
mins = seconds // 60
hours = mins // 60
days = hours // 24
seconds %= 60
mins %= 60
hours %= 24

print(f'total execution time: {days}-{hours}:{mins}:{seconds}')


## Calculate Proportional Overhead and Sums

In [None]:

# Calculate Tqr for f=0
df['tqr'] = df['pbmgs']

f_zero_dict = df.loc[df['f'] == 0].set_index(['n', 'p'])['tqr'].to_dict()

df.loc[df['f'] != 0, 'tqr'] = df.loc[df['f'] != 0].apply(
    lambda row: f_zero_dict.get((row['n'], row['p']), row['tqr']),
    axis=1
)

# Calculate Tcomp
df['tcomp'] = df['pbmgs'] - df['tqr']

# Take proportions
df['encode'] = df['cs construct'] / df['tqr']
df['post'] = df['post-ortho'] / df['tqr']
df['decode'] = df['recovery'] / df['tqr'] / ( df['p'] + df['f'] )
df['comp'] = df['tcomp'] / df['tqr']

df['overhead'] = df['comp'] + df['encode'] + df['post']

print(df.iloc[:,-5:])

## Take averages for all iterations

In [None]:
config_cols = ['n', 'f', 'p']

# df_means = df.groupby(config_cols)[df.columns[3:]].median()
# df_std = (df.groupby(config_cols)[df.columns[3:]].quantile(0.75) - df.groupby(config_cols)[df.columns[3:]].quantile(0.25)) / 2

# Sort the rows by total within each group
df_sorted = df.sort_values(['total']).groupby(config_cols)

# Drop the first and last rows from each group
df_means = df_sorted.apply(lambda x: x.iloc[:][df.columns[3:]].mean())
df_std = df_sorted.apply(lambda x: x.iloc[:][df.columns[3:]].std())

print(df_means[:5])


# Absolute time and overhead over $p$

In [None]:
markers = ['o', 'x', '*', '^', 'v', 'p', '>', '<', 'D', 'H']

for n_val in df['n'].unique():
    
    n_rows = df_means.query(f"n == {n_val}")
    n_std = df_std.query(f"n == {n_val}")
    
    fig, (time, overhead) = plt.subplots(1, 2, figsize=(12, 4))
    fig.text(0.5,0, f'n={n_val}', size=12, ha="center")
    
    for i, f_val in enumerate(df['f'].unique()):
        
        f_rows = n_rows.query(f"f == {f_val}")
        f_std = n_std.query(f"f == {f_val}")
                
        p_vals = f_rows.index.get_level_values('p')
        
        marker = markers[i % len(markers)]
        
        time.errorbar(p_vals, f_rows['total'], yerr=f_std['total'], label=f'f={f_val}', linestyle='--', marker=marker)
        overhead.errorbar(p_vals, f_rows['overhead'], yerr=f_std['overhead'], label=f'f={f_val}', linestyle='--', marker=marker)

    
    time.set_title(f'Execution Time') 
    time.set_xlabel('p')
    time.set_ylabel('Time (s)')
    time.legend()
    
    overhead.set_title(f'Overhead') 
    overhead.set_xlabel('p')
    overhead.set_ylabel('Proportion of Tqr')
    overhead.legend()
    
    fig.show()
       

# Coding Breakdown over $p$

In [None]:
for n_val in df['n'].unique():
    
    n_rows = df_means.query(f"n == {n_val}")
    # n_rows.drop('n', axis=1, inplace=True)
    
    fig, (enc, recov, post) = plt.subplots(1, 3, sharey=True, figsize=(15, 4))
    fig.text(0.5,-0.1, f'n={n_val}', size=12, ha="center")  
  
    for i, f_val in enumerate(df['f'].unique()):
        
        f_rows = n_rows.query(f"f == {f_val}")
        # f_rows.drop('f', axis=1, inplace=True)
        
        p_vals = f_rows.index.get_level_values('p')
        
        marker = markers[i % len(markers)]
        
        enc.plot(p_vals, f_rows['encode'], label=f'f={f_val}', linestyle='--', marker=marker)
        recov.plot(p_vals, f_rows['decode'], label=f'f={f_val}', linestyle='--', marker=marker)
        post.plot(p_vals, f_rows['post'], label=f'f={f_val}', linestyle='--', marker=marker)

    
    enc.set_title(f'Encoding Overhead') 
    enc.set_xlabel('p')
    enc.set_ylabel('Proportion of Tqr')
    enc.legend()
    
    recov.set_title(f'Recovery Overhead')
    recov.set_xlabel('p')
    
    post.set_title(f'Post Orthogonalization Overhead')
    post.set_xlabel('p')
    
    fig.show()


# Overhead over $f$

In [None]:
for n_val in df['n'].unique():
    
    n_rows = df_means.query(f"n == {n_val}")
    # n_rows.drop('n', axis=1, inplace=True)
    
    fig, ((overhead, enc), (recov, post)) = plt.subplots(2, 2, figsize=(12, 8))
    fig.text(0.5,0.05, f'n={n_val}', size=12, ha="center")
    
    for i, p_val in enumerate(df['p'].unique()):
        
        p_rows = n_rows.query(f"p == {p_val}")
        # f_rows.drop('f', axis=1, inplace=True)
        
        f_vals = p_rows.index.get_level_values('f')
        
        marker = markers[i % len(markers)]
        
        overhead.plot(f_vals, p_rows['overhead'], label=f'p={p_val}', linestyle='--', marker=marker)
        enc.plot(f_vals, p_rows['encode'], label=f'p={p_val}', linestyle='--', marker=marker)
        recov.plot(f_vals, p_rows['decode'], label=f'p={p_val}', linestyle='--', marker=marker)
        post.plot(f_vals, p_rows['post'], label=f'p={p_val}', linestyle='--', marker=marker)

    overhead.set_title(f'Overhead')
    overhead.set_ylabel('Proportion of Tqr')
    overhead.legend()
    
    enc.set_title(f'Encoding') 
    
    recov.set_title(f'Recovery')
    recov.set_xlabel('f')
    recov.set_ylabel('Proportion of Tqr')

    post.set_title(f'Post Orthogonalization')
    post.set_xlabel('f')
    
    fig.show()