In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
results_gsmax = pd.read_csv("../evaluation/test_gs_max_int_emb/results.csv")
results_astactic = pd.read_csv("../evaluation/test_astactic/results.csv")
analysis = pd.read_csv("stats.csv")

In [None]:
results_gsmax['model'], results_astactic['model'] = 'gsmax', 'astactic'
results = pd.concat([results_gsmax, results_astactic], ignore_index=True, axis=0)
# df = pd.merge(results, analysis, on=["lib", "project", "proof"])
# df.columns

In [None]:
df_gsmax = pd.merge(results_gsmax, analysis, on=["lib", "project", "proof"])
df_astactic = pd.merge(results_astactic, analysis, on=["lib", "project", "proof"])
df_astactic.head(10)

In [None]:
df_astactic.columns

# Overall comparison by library

In [None]:
print((df_astactic['success'] == True).astype(int).sum())
print((df_gsmax['success'] == True).astype(int).sum())

In [None]:
def compare_bar_charts(df, col1, col2, filename):
    plt.figure()
    x_axis = np.arange(len(df))
    y1, y2 = df[col1], df[col2]
    plt.bar(x_axis - 0.2, y1, 0.4, label = col1)
    plt.bar(x_axis + 0.2, y2, 0.4, label = col2)
    plt.xticks(x_axis, df_project.index.tolist(), rotation=45, ha='right')
    plt.legend()
    plt.savefig(f"{filename}.pdf",bbox_inches='tight')

In [None]:
# rows = project names; columns = model_success_count, total_proofs, model_success_pct
# groupby on project name
cols = ['project', 'success_count', 'proof_count']
df_astactic['success_count'] = (df_astactic['success'] == True).astype(int)
df_gsmax['success_count'] = (df_gsmax['success'] == True).astype(int)
df_astactic['proof_count'], df_gsmax['proof_count'] = 1, 1

df1_project = df_astactic[cols].groupby(['project']).sum()
df2_project = df_gsmax[cols].groupby(['project']).sum()

df1_project['success_pct'] = df1_project['success_count'] / df1_project['proof_count']
df2_project['success_pct'] = df2_project['success_count'] / df2_project['proof_count']

df1_project = df1_project.add_prefix('astactic_')
df2_project = df2_project.add_prefix('gsmax_')
df_project = df1_project.merge(df2_project, how='left', on='project')
df_project = df_project.sort_values(by='gsmax_success_count', ascending=False)
df_project.rename(index={'coq-library-undecidability': 'coq-library-und.'}, inplace=True)
df_project

In [None]:
compare_bar_charts(df_project, col1='astactic_success_pct', col2='gsmax_success_pct', filename='compare_results_pct')
compare_bar_charts(df_project, col1='astactic_success_count', col2='gsmax_success_count', filename='compare_results_count')

# Histogram Plots

In [None]:
def plot_hist(df, col, quantile, title, filename):
    plt.figure()
#     df = df[df["success"] == success]
    df = df[col]
    mean = df.mean()
    median = df.median()
    df = df[df < df.quantile(quantile)]
    df.hist(bins=min(100, len(df.unique())), label="count")
    plt.axvline(mean, color="red", linestyle='dashed', label=f"mean = {mean:.2f}")
    plt.axvline(median, color="orange", linestyle='dashed', label=f"median = {median:.2f}")
    plt.legend()
#     plt.title(f"{title}: {col} (quantile={quantile})")
    plt.title(f"{title} ({col})")
    plt.savefig(f'{filename}.pdf') 

In [None]:
def plot_hist_success(df, col, success, quantile):
    plt.figure()
    df = df[df["success"] == success]
    df = df[col]
    mean = df.mean()
    df = df[df < df.quantile(quantile)]
    df.hist(bins=min(100, len(df.unique())), label="count")
    plt.axvline(mean, color="red", label=f"mean = {mean:.2f}")
    plt.legend()
    plt.title(f"{col} (success={success}, quantile={quantile})")

In [None]:
def compare_hist_plot(df1, df2, df1_name, df2_name, col, success, quantile):
    fig, axes = plt.subplots(1, 2)

    df1 = df1[df1["success"] == success]
    df2 = df2[df2["success"] == success]
    df1, df2 = df1[col], df2[col]
    
    mean1, mean2 = df1.mean(), df2.mean()
    df1 = df1[df1 < df1.quantile(quantile)]
    df2 = df2[df2 < df2.quantile(quantile)]
    
    df1.hist(bins=min(100, len(df1.unique())), label="count", ax=axes[0])
    df2.hist(bins=min(100, len(df2.unique())), label="count", ax=axes[1])
    
    axes[0].set_title(df1_name); axes[1].set_title(df2_name)
    axes[0].axvline(mean1, color="red", label=f"mean = {mean1:.2f}")
    axes[1].axvline(mean2, color="red", label=f"mean = {mean2:.2f}")
    axes[0].legend(); axes[1].legend()
    
    fig.suptitle(f"{col} (success={success}, quantile={quantile})")

In [None]:
analysis.head()

In [None]:
import numpy as np
np.median(analysis[analysis['n_steps']>0]['nodes_p99'])
np.median(analysis[analysis['n_steps']>0]['n_steps'])

In [None]:
plot_hist(analysis[analysis['n_steps'] > 0], 'n_steps', 0.99, 'number of proof steps', 'hist_num_proof_steps')
plot_hist(analysis[analysis['n_steps'] > 0], 'height_p99', 0.99, 'height of context terms', 'hist_height_of_terms')
plot_hist(analysis[analysis['n_steps'] > 0], 'nodes_p99', 0.98, 'number of term nodes', 'hist_num_term_nodes')

In [None]:
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "n_steps", True, 0.99)
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "n_steps", False, 0.99)
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "n_steps", True, 1)
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "n_steps", False, 1)

In [None]:
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "nodes_p100", True, 0.99)
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "nodes_p100", False, 0.99)
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "nodes_p100", True, 1)
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "nodes_p100", False, 1)

In [None]:
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "num_tactics", True, 0.99)
compare_hist_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "num_tactics", True, 1)

# Scatter Plots

In [None]:
def plot_scatter(df, xcol, ycol, quantile):
    plt.figure()
    df = df[[xcol, ycol, 'success']]
    df = df[df[xcol] < df[xcol].quantile(quantile)]
    df = df[df[ycol] < df[ycol].quantile(quantile)]
    plt.scatter(df[df['success'] == False][xcol], df[df['success'] == False][ycol], c='red', label='success = False')
    plt.scatter(df[df['success'] == True][xcol], df[df['success'] == True][ycol], c='blue', label='success = True')
    plt.legend()
    plt.xlabel(xcol)
    plt.ylabel(ycol)
    plt.title(f"{xcol} vs {ycol} (quantile={quantile})")

In [None]:
def compare_scatter_plot(df1, df2, df1_name, df2_name, xcol, ycol, quantile):
    fig, axes = plt.subplots(1, 2)

    df1 = df1[[xcol, ycol, 'success']]
    df2 = df2[[xcol, ycol, 'success']]
    
    df1 = df1[df1[xcol] < df1[xcol].quantile(quantile)]
    df1 = df1[df1[ycol] < df1[ycol].quantile(quantile)]
    df2 = df2[df2[xcol] < df2[xcol].quantile(quantile)]
    df2 = df2[df2[ycol] < df2[ycol].quantile(quantile)]
    
    axes[0].scatter(df1[df1['success'] == False][xcol], df1[df1['success'] == False][ycol], c='red', label='success = False')
    axes[0].scatter(df1[df1['success'] == True][xcol], df1[df1['success'] == True][ycol], c='blue', label='success = True')
    axes[1].scatter(df2[df2['success'] == False][xcol], df2[df2['success'] == False][ycol], c='red', label='success = False')
    axes[1].scatter(df2[df2['success'] == True][xcol], df2[df2['success'] == True][ycol], c='blue', label='success = True')
    
    axes[0].set_title(df1_name); axes[1].set_title(df2_name)
    axes[0].set_ylabel(ycol); axes[0].set_xlabel(xcol)
    axes[1].set_ylabel(ycol); axes[1].set_xlabel(xcol)
    axes[0].legend(); axes[1].legend()
    plt.suptitle(f"{xcol} vs {ycol} (quantile={quantile})")

In [None]:
compare_scatter_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "n_steps", "nodes_p100", 1)
compare_scatter_plot(df_astactic, df_gsmax, "astactic", "graphsage-max", "n_steps", "nodes_p100", 0.99)