# Analysis of output files
## Prepare environment, functions etc.

In [None]:
import os
import sys
import pandas
import matplotlib
import numpy
from algorithm_tester.helpers import FilePair
import matplotlib.pyplot as plt


# Zapnout zobrazování grafů (procento uvozuje „magickou” zkratku IPythonu):
%matplotlib inline

path = 'tester_results'
evo_path = f'tester_results_evo'
sol_path = '../data'

sol_cols = ["output_filename", "best_value"]
evo_cols = ["output_filename", "num_of_satisfied_clauses", "weight"]

In [None]:
#pandas.set_option('display.max_rows', None)
#pandas.read_csv?

## Important functions

In [None]:
def get_file_paths_from_dir(path: str, include_sol: bool = False, include_instance: bool = True) -> (str, str, str):
    dataset_prefix: str = path.split("/")[-1]
    for root, _, files in os.walk(path):
        dataset: str = dataset_prefix + "_" + "_".join(root.replace(path, "")[1:].split('/'))
        for file in files:
            if "column" not in file and ".dat" in file:
                if ("_sol" in file and include_sol) or ("_inst" in file and include_instance):
                    yield (dataset, root, file)

def get_cols_list(path: str):
    cols = pandas.read_csv(path, index_col=None, delimiter=" ", header=None)
    return list(cols.iloc[0])

def save_table(table, output_name):
    table.round(6).to_excel(f"excel/{output_name}_table.xlsx", sheet_name=output_name)

In [None]:
def save_plot(table, title: str, column_name: str, output_name: str, y_label: str = "Relative errors"):
    worktable = table.loc[:, column_name].copy()
    
    plot = worktable.plot.bar()
    plot.set_ylabel(y_label)

    figure = plot.get_figure()
    figure.suptitle(title)
    figure.savefig(f"excel/{output_name}.pdf", bbox_inches='tight')
    
    return plot

def init_evo_plot(title: str, ylabel: str = "Satisfied clauses", figsize = [13,3]):
    fig, axes = plt.subplots(1, 1, sharex=True, sharey=True, figsize=figsize)
    axes.set_xlabel('step')
    axes.set_ylabel(ylabel)
    axes.set_title(title)
    
    return fig, axes

def get_evo_table(path: str, column: str = "num_of_satisfied_clauses"):
    evo_table = pandas.read_csv(path, index_col=None, delimiter=" ", header=None)
    evo_table.columns = evo_cols
    evo_table["step"] = range(evo_table[column].count())
    return evo_table

def add_optimal_sol_plot(y_val, axes):
    axes.axhline(y=y_val, color="grey", label="Optimal solution", alpha=0.5)
    
def add_evo_plot(path: str, yaxis_label: str, axes, column: str = "num_of_satisfied_clauses"):
    evo_table = get_evo_table(path)
    
    data = evo_table.sort_values(by="step").set_index(["step"]).loc[:, column]
    
    ax = axes.plot(data, label=f'{yaxis_label}', alpha=0.8)
    return evo_table

def create_num_of_valid_table(table, column_name: str, table_name: str = "unknown"):
    t = table.query("is_valid == True").copy()
    
    t = t.groupby([column_name, "algorithm_name"])["is_valid"].count().reset_index()
    entries_count_table = table.groupby([column_name, "algorithm_name"])["is_valid"].count().reset_index()
    t["% valid"] = t["is_valid"]/entries_count_table["is_valid"]
    
    t = t.drop(columns=["is_valid"])
    t = t.round(6)
    
    # Construct, unstack
    #avg_error = avg_error.set_index(["algorithm_name", column_name]).unstack("algorithm_name")
    #avg_error = error_max.join(error_avg).round(6)
    #avg_error.columns = ["max_relative_error", "avg_relative_error"]
    t.name = f"Avg & max relative error per {column_name}"
    t = t.set_index(["algorithm_name", column_name]).unstack("algorithm_name")
    
    
    t.round(3).to_excel(f"excel/{table_name}_num_of_valid.xlsx", sheet_name=table_name)
    
    return t

def create_avg_time_table(table, name: str, column_name: str = "item_count"):
    # Create a table of average times according to algorithm and item_count columns
    avg_times = table.groupby(["algorithm_name", column_name])['elapsed_time'] \
        .mean().reset_index()
    #avg_times = avg_times.round(2)
    
    avg_configs = table.groupby(["algorithm_name", column_name])['elapsed_configs']\
        .mean().reset_index()
    
    avg_times = avg_times.merge(avg_configs, on=["algorithm_name", column_name])

    # Move all values of algorithm column into separate columns
    #avg_times = avg_times.unstack("algorithm_name")
    #avg_times.columns = avg_times.columns.droplevel()
    avg_times.name = f"Avg #configs per {column_name}"
    avg_times.sort_values(by=column_name, inplace=True)
    #avg_times.fillna("-", inplace=True)
    
    avg_times = avg_times.set_index(["algorithm_name", column_name]).unstack("algorithm_name")

    # Save the dataframe to csv
    avg_times.to_excel(f'excel/{name}_avg_times.xlsx', sheet_name=name)
    
    return avg_times

def create_avg_error_table_by_best_value(table, column_name: str, table_name: str = "unknown"):
    t = table.query("has_best_value == True").copy()
    t = t.fillna(0)
    
    error_group = t.groupby([column_name, "algorithm_name"])["relative_mistake_weights"]

    error_max = error_group.max().reset_index() \
        .rename(columns={'relative_mistake_weights':'max_relative_error'})
    error_avg = error_group.mean().reset_index() \
        .rename(columns={'relative_mistake_weights':'avg_relative_error'})
    
    # Construct, unstack
    avg_error = pandas.merge(error_max, error_avg, on=[column_name, "algorithm_name"])
    #avg_error = avg_error.set_index(["algorithm_name", column_name]).unstack("algorithm_name")
    #avg_error = error_max.join(error_avg).round(6)
    #avg_error.columns = ["max_relative_error", "avg_relative_error"]
    avg_error.name = f"Avg & max relative error per {column_name}"
    avg_error = avg_error.set_index(["algorithm_name", column_name]).unstack("algorithm_name")
    
    
    avg_error.to_excel(f"excel/{table_name}_avg_error_bestvalue.xlsx", sheet_name=table_name)
    
    return avg_error

def create_avg_error_table_by_clauses(table, column_name: str, table_name: str = "unknown"):
    t = table.query("has_best_value == True").copy()
    t = t.fillna(0)
    
    error_group = t.groupby([column_name, "algorithm_name"])["relative_mistake_clauses"]

    error_max = error_group.max().reset_index() \
        .rename(columns={'relative_mistake_clauses':'max_relative_error'})
    error_avg = error_group.mean().reset_index() \
        .rename(columns={'relative_mistake_clauses':'avg_relative_error'})
    
    # Construct, unstack
    avg_error = pandas.merge(error_max, error_avg, on=[column_name, "algorithm_name"])
    #avg_error = avg_error.set_index(["algorithm_name", column_name]).unstack("algorithm_name")
    #avg_error = error_max.join(error_avg).round(6)
    #avg_error.columns = ["max_relative_error", "avg_relative_error"]
    avg_error.name = f"Avg & max relative error per {column_name}"
    avg_error = avg_error.set_index(["algorithm_name", column_name]).unstack("algorithm_name")
    
    
    #avg_error.round(6).to_excel(f"excel/{table_name}_avg_error_clauses.xlsx", sheet_name=table_name)
    
    return avg_error

def get_full_table_for_dataset(instance_path: str, sol_path: str, instance_cols, sol_cols):
    output_table = None
    
    # Get all solutions from all files
    output_table = pandas.read_csv(instance_path, index_col=None, delimiter=" ", header=None)\
        .iloc[:, [0, 1,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1]]
    output_table.columns = instance_cols

    # Add data from solution file
    sols_table = pandas.read_csv(sol_path, index_col=None, delimiter=" ", header=None)\
        .iloc[:, [0, 1]]
    sols_table.columns = sol_cols
    sols_table.drop_duplicates(subset="output_filename", inplace=True)
        
    output_table = pandas.merge(sols_table, output_table, on=["output_filename"], how="outer")
    output_table = output_table.astype({'found_value': 'int64'})
    output_table["relative_mistake_weights"] = numpy.abs(output_table["best_value"] - output_table["found_value"])/output_table["best_value"]

    output_table["relative_mistake_clauses"] = (output_table["num_of_clauses"] - output_table["num_of_satisfied_clauses"])/output_table["num_of_satisfied_clauses"]
    output_table["has_best_value"] = ~numpy.isnan(output_table["best_value"])
    
    output_table = output_table.fillna(0.0).astype({'best_value': 'int64'})
    
    return output_table

def get_all_tables_of_results(base: str):
    cols = get_cols_list("column_description.dat")
    cols.remove("vars_output")
    dfs = list()
    
    datasets = [
        "wuf-A/wuf20-88-A",
        "wuf-A/wuf20-91-A",
        
        "wuf-M/wuf20-78-M",
        "wuf-M/wuf50-201-M",
        
        "wuf-N/wuf20-78-N",
        "wuf-N/wuf50-201-N",
        
        "wuf-Q/wuf20-78-Q",
        "wuf-Q/wuf50-201-Q",
        
        "wuf-R/wuf20-78-R",
        "wuf-R/wuf50-201-R"
    ]
    for algorithm in ["SA_SAT_V1", "SA_SAT_V2", "SA_SAT_V3"]:
        
        for dataset in datasets:
            d = get_full_table_for_dataset(f'{base}/{dataset}_{algorithm}.dat', f'{sol_path}/{dataset}-opt.dat', cols, sol_cols)
            dfs.append(d)
    
    return pandas.concat(dfs)

# Load all tables

In [None]:
v1_simple = get_all_tables_of_results("tester_results_V1_Simple")
v2_moreCooling = get_all_tables_of_results("tester_results_V2_MoreCooling")
v3_higherTemp = get_all_tables_of_results("tester_results_V3_HigherTemp")
v4_moreCycles = get_all_tables_of_results("tester_results_V4_MoreCycles")
v5_lowerTemp = get_all_tables_of_results("tester_results_V5_LowerTemp")
v6_higherTemp = get_all_tables_of_results("tester_results_V6_HigherTemp")



In [None]:
print(100*v1_simple.query("algorithm_name == 'SA_SAT_V1'").query("is_valid == False")["algorithm_name"].count() / v1_simple.query("algorithm_name == 'SA_SAT_V1'")["algorithm_name"].count())

v1_simple.query("algorithm_name == 'SA_SAT_V3'").query("relative_mistake_weights > 0.6")["algorithm_name"].count()

v1_simple

# Base data

## Init temperature

In [None]:
full_table = v1_simple.append(v3_higherTemp).append(v6_higherTemp)

error_init_temp = create_avg_error_table_by_best_value(full_table, "init_temperature")
save_plot(error_init_temp, "Avg error - init temperatures", "avg_relative_error", "init_temp_avg_error")


time_init_temp = create_avg_time_table(full_table, "init_temperature", "init_temperature")
save_plot(time_init_temp, "Avg speed - init temperatures", "elapsed_time", "init_temp_time_ms", "Time[ms]")

create_num_of_valid_table(full_table, "init_temperature", "init_temperature")


## Cycles

In [None]:
full_table = v1_simple.append(v4_moreCycles)

error_cycles = create_avg_error_table_by_best_value(full_table, "cycles")
save_plot(error_cycles, "Avg error - cycles", "avg_relative_error", "cycles_avg_error")


time_cycles = create_avg_time_table(full_table, "cycles", "cycles")
save_plot(time_cycles, "Avg speed - cycles", "elapsed_time", "cycles_time_ms", "Time[ms]")


create_num_of_valid_table(full_table, "cycles", "cycles")



## Cooling coefficient

In [None]:
full_table = v1_simple.append(v2_moreCooling)

error_cooling = create_avg_error_table_by_best_value(full_table, "cooling")
save_plot(error_cooling, "Avg error - cooling coefficients", "avg_relative_error", "cooling_avg_error")

time_cooling = create_avg_time_table(full_table, "cooling", "cooling")
save_plot(time_cooling, "Avg speed - cooling coefficients", "elapsed_time", "cooling_time_ms", "Time[ms]")

create_num_of_valid_table(full_table, "cooling", "cooling")


## Min temperature

In [None]:
full_table = v1_simple.append(v5_lowerTemp)

error_min_temp = create_avg_error_table_by_best_value(full_table, "min_temperature")
save_plot(error_min_temp, "Avg error - minimum temperature", "avg_relative_error", "min_temp_avg_error")

time_min_temp = create_avg_time_table(full_table, "min_temperature", "min_temperature")
save_plot(time_min_temp, "Avg speed - minimum temperature", "elapsed_time", "min_temp_time_ms", "Time[ms]")

create_num_of_valid_table(full_table, "min_temperature", "min_temperature")



# Evo files

In [None]:
num_of_clauses = 201
best_weight = 13570

fig, axes = init_evo_plot("Base evo files for different algorithm versions")

add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V1_sol.evo", "V1", axes)
add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V2_sol.evo", "V2", axes)
add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V3_sol.evo", "V3", axes)
add_optimal_sol_plot(num_of_clauses, axes)
axes.legend()
fig.savefig(f"excel/algorithm_comparison_evo_clauses.pdf")

fig, axes = init_evo_plot("Base evo files for different algorithm versions", ylabel="Sum weight")

add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V1_sol.evo", "V1", axes, "weight")
add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V2_sol.evo", "V2", axes, "weight")
add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V3_sol.evo", "V3", axes, "weight")
add_optimal_sol_plot(best_weight, axes)
axes.legend()
fig.savefig(f"excel/algorithm_comparison_evo_weight.pdf")

## Temperature

In [None]:
fig, axes = init_evo_plot("Initial temperature comparison for V3", ylabel="Sum weight")

add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V3_sol.evo", "InitTemp_1000", axes, "weight")
add_evo_plot(f"{evo_path}/init_temp/5000/wuf50-0987_SA_SAT_V3_sol.evo", "InitTemp_5000", axes, "weight")
add_evo_plot(f"{evo_path}/init_temp/10000/wuf50-0987_SA_SAT_V3_sol.evo", "InitTemp_10000", axes, "weight")
add_optimal_sol_plot(best_weight, axes)
axes.legend()
fig.savefig(f"excel/init_temp_comparison_evo_weight.pdf")

fig, axes = init_evo_plot("Initial temperature comparison for V3")

add_evo_plot(f"{evo_path}/simple/wuf50-0987_SA_SAT_V3_sol.evo", "InitTemp_1000", axes)
add_evo_plot(f"{evo_path}/init_temp/5000/wuf50-0987_SA_SAT_V3_sol.evo", "InitTemp_5000", axes)
add_evo_plot(f"{evo_path}/init_temp/10000/wuf50-0987_SA_SAT_V3_sol.evo", "InitTemp_10000", axes)
add_optimal_sol_plot(num_of_clauses, axes)
axes.legend()
fig.savefig(f"excel/init_temp_comparison_evo_clauses.pdf")