## Imports

In [None]:
# All imports here!
import pandas as pd

import matplotlib.pyplot as plt
import numpy as np



## Data Export

In [None]:
# Read the data from the excel files
bva_results = pd.read_excel('bva_results.xlsx')
results_30m = pd.read_excel('30m_results.xlsx')
results_60m = pd.read_excel('60m_results.xlsx')

## Edusat-BVA Results

The benchmark on which the performance is tested is the SAT23 competition benchmark.
We will present graphs for two types of runs:
1. CNFs that required at most 30 minutes.
2. CNFs that required at least 30 minutes and at most 60 minutes.

In [None]:
# The 30 minute database and the 60 minute database have the same columns
# Filter both of thems to only have the rows with the value of "Improvement Factor" != 1.
# After filtering the rows in both databse, create a new database with the rows that are in both databases.

results_30m_filtered = results_30m[results_30m['Improvement Factor'] != 1]
results_60m_filtered = results_60m[results_60m['Improvement Factor'] != 1]

mutual_values_in_30m = results_30m_filtered[results_30m_filtered['test'].isin(results_60m_filtered['test'])]
mutual_values_in_60m = results_60m_filtered[results_60m_filtered['test'].isin(results_30m_filtered['test'])]

# Add a check that checks if the mutual_values_in_30m and mutual_values_in_60m are empty or not
# If they are empty, print "No mutual values found"
# If they are not empty, print the number of mutual values found

if mutual_values_in_30m.empty and mutual_values_in_60m.empty:
    print("No mutual values found")
else:
    print(f"{len(mutual_values_in_30m)} mutual values found")

results = pd.concat([results_30m_filtered, results_60m_filtered])
assert results.shape[0] == results_30m_filtered.shape[0] + results_60m_filtered.shape[0]

columns_to_convert = [
    'EDUSAT-BVA_clauses_reduced',
    'EDUSAT-BVA_clauses_added',
    'EDUSAT-BVA_clauses_deleted',
    'EDUSAT-BVA_auxiliary_variables'
]

for col in columns_to_convert:
    # If there's no value, put 0
    results[col] = results[col].fillna(0)
    results[col] = results[col].astype(int)


In [None]:
def survival_plot(results):
    fig, ax = plt.subplots()
    ax.set_xlabel('Number of tests solved')
    ax.set_ylabel('Time (s)')
    ax.set_title('Survival plot of the Results')
    
    # Sorting the results
    edusat_sorted = np.sort(results['EDUSAT_solve_time'])
    edusat_bva_sorted = np.sort(results['EDUSAT-BVA_solve_time'])
    
    # Generating x values (1, 2, ..., N)
    x_values = np.arange(1, len(edusat_sorted) + 1)
    
    ax.set_xticks(np.arange(0, len(edusat_sorted) + 1, 5))
    ax.set_yticks(np.arange(0, 4001, 600))
    
    ax.plot(x_values, edusat_sorted, marker='o', label='EDUSAT')
    ax.plot(x_values, edusat_bva_sorted, marker='o', label='EDUSAT-BVA')
    ax.legend()
    plt.show()

survival_plot(results)

In [None]:
def scatter_plot(results, x_key='EDUSAT_solve_time', y_key='EDUSAT-BVA_solve_time', x_label=None, y_label=None, title='Scatter plot comparison', min_time_filter=None):
    fig, ax = plt.subplots()
    ax.set_xlabel(x_label if x_label else x_key.replace('_', ' ').title())
    ax.set_ylabel(y_label if y_label else y_key.replace('_', ' ').title())
    ax.set_title(title)
    
    x_times = results[x_key]
    y_times = results[y_key]
    
    # Apply filtering if min_time_filter is provided
    if min_time_filter is not None:
        mask = (x_times >= min_time_filter) | (y_times >= min_time_filter)
        x_times = x_times[mask]
        y_times = y_times[mask]
    
    ax.scatter(x_times, y_times, alpha=0.6)
    max_time = max(x_times.max(), y_times.max())
    ax.plot([0, max_time], [0, max_time], linestyle='dashed', color='red', label='y=x')
    
    # Counting points above and below the line
    above_line = np.sum(x_times > y_times)
    below_line = np.sum(x_times < y_times)

    # Printing overall points on the plot
    ax.text(max_time * 0.8, max_time * 0.1, f'Total points: {len(x_times)}')
    
    # Printing counts instead of displaying on the graph
    print(f'{x_key} better: {above_line}')
    print(f'{y_key} better: {below_line}')
    
    ax.legend()
    plt.show()

In [None]:
# Compare EDUSAT_solve_time with EDUSAT-BVA_solve_time
scatter_plot(results, 'EDUSAT_solve_time', 'EDUSAT-BVA_solve_time', 
             'EDUSAT Solve Time (s)', 'EDUSAT-BVA Solve Time (s)', 
             'EDUSAT vs EDUSAT-BVA Solve')

In [None]:
# Compare EDUSAT_solve_time with EDUSAT-BVA_search_time
scatter_plot(results, 'EDUSAT_solve_time', 'EDUSAT-BVA_search_time', 
             'EDUSAT Solve Time (s)', 'EDUSAT-BVA Search Time (s)', 
             'EDUSAT vs EDUSAT-BVA Search')

We'll present the same scatter plots while filtering all the results under 10 seconds!

In [None]:
# Compare EDUSAT_solve_time with EDUSAT-BVA_solve_time, filtering out tests solved in under 10 seconds
scatter_plot(results, 'EDUSAT_solve_time', 'EDUSAT-BVA_solve_time', 
             'EDUSAT Solve Time (s)', 'EDUSAT-BVA Solve Time (s)', 
             'EDUSAT vs EDUSAT-BVA Solve', min_time_filter=60)

# Compare EDUSAT_solve_time with EDUSAT-BVA_search_time, filtering out tests solved in under 10 seconds
scatter_plot(results, 'EDUSAT_solve_time', 'EDUSAT-BVA_search_time', 
             'EDUSAT Solve Time (s)', 'EDUSAT-BVA Search Time (s)', 
             'EDUSAT vs EDUSAT-BVA Search', min_time_filter=60)

We'll present below the most notable gains and regressions

In [None]:
def print_rows(df):
    # Rename only the specified columns
    column_rename_map = {
        'EDUSAT_solve_time': 'EDUSAT (s)',
        'EDUSAT-BVA_solve_time': 'EDUSAT-BVA (s)',
        'EDUSAT-BVA_clauses_reduced': 'Clauses Reduced',
        'EDUSAT-BVA_auxiliary_variables': 'Extra Vars',
        'Improvement Factor': 'Improvement Factor'
    }

    df.rename(columns=column_rename_map, inplace=True)

    # Select only the renamed columns for display
    selected_columns = list(column_rename_map.values())

    display(
        df[selected_columns]
        .style.set_table_styles(
            [
                {"selector": "th", "props": [("text-align", "center")]},  # Center headers
                {"selector": "td", "props": [("text-align", "center")]}   # Center cell values
            ]
        )
        .set_caption("Top 10 Results")
        .set_properties(**{"text-align": "center"})  # Ensure pandas recognizes the alignment
    )

In [None]:
top = results.nlargest(10, 'Improvement Factor').copy()
print_rows(top)

In [None]:
bottom = results.nsmallest(10, 'Improvement Factor').copy()
print_rows(bottom)

Finally, we'll draw a graph that represents the ration between the preprocessing time and the total solve time for relevant EDUSAT-BVA results.

In [None]:
def plot_preprocessing_ratio(results):
    """
    Plots the ratio of preprocessing time to total solve time for EDUSAT+BVA results.

    Parameters:
        results (pd.DataFrame): DataFrame containing 'EDUSAT-BVA_solve_time' and 'EDUSAT-BVA_preprocessing_time' columns.
    """
    # Compute the ratio without creating an extra column
    ratios = results['EDUSAT-BVA_preprocessing_time'] / results['EDUSAT-BVA_solve_time']

    # Create the histogram
    plt.figure(figsize=(8, 5))
    plt.hist(ratios, bins=20, edgecolor='black', alpha=0.7)

    # Labels and title
    plt.xlabel('Preprocessing Time / Total Solve Time')
    plt.ylabel('Frequency')
    plt.title('Distribution of Preprocessing Time Ratio in EDUSAT+BVA')
    
    # Show the plot
    plt.show()


In [None]:
plot_preprocessing_ratio(results)