# Analysis of output files
## Prepare environment, functions etc.

In [1]:
import os
import sys

# Add local src directory to the path. Then we are able to import our files.
# module_path = os.path.abspath(os.path.join('algorithm_tester'))
# if module_path not in sys.path:
#    sys.path.append(module_path)

In [2]:
import pandas
# import matplotlib
import numpy
from algorithm_tester.helpers import get_analysis_files, getFiles, FilePair
from algorithm_tester.mydataclasses import AnalysisFile

# Zapnout zobrazování grafů (procento uvozuje „magickou” zkratku IPythonu):
# %matplotlib inline

path = 'tester_results'

In [3]:
#pandas.set_option('display.max_rows', None)
#pandas.read_csv?

In [4]:
# Important functions

def remove_bag_results(table):
    delim_index = list(table.iloc[0]).index("|")
    
    return table.iloc[:, 0:delim_index]

def get_cols_list(path: str):
    cols = pandas.read_csv(path, index_col=None, delimiter=" ", header=None)
    cols = remove_bag_results(cols)
    return list(cols.iloc[0])

def load_analysis_files(folder_path: str, column_list):
    files: AnalysisFile = get_analysis_files(folder_path)
    output_table = None
    
    for file in files:
        curr_table = pandas.read_csv(file.full_path, index_col=None, delimiter=" ", header=None)
        curr_table = remove_bag_results(curr_table)
        curr_table.columns = column_list
        curr_table["instance_info"] = file.instance_info
        curr_table["dataset"] = file.dataset
        
        # curr_table["strategy"] = file.strategy
        
        if output_table is not None:
            output_table = output_table.append(curr_table, ignore_index=True)
        else:
            output_table = curr_table
    
    output_table = output_table.set_index(['strategy', 'dataset', 'id', "item_count"])
    output_table.sort_values(by=["strategy", "dataset", "item_count", "id"], inplace=True)
    return output_table

def construct_table_from(filePair: FilePair):
    solution_table = pandas.read_csv(filePair.solutionFile, header=None, index_col=None, delimiter=" ")
    data_table = pandas.read_csv(filePair.dataFile, header=None, index_col=None, delimiter=" ")
    
    item_count = data_table.iloc[0, 1]
    
    solution_table = solution_table.drop_duplicates(subset=[0], keep='first').reset_index()

    data_table = data_table.iloc[:, 4:]
    data_table = data_table[data_table.columns[::2]]

    info_table = pandas.concat([solution_table.iloc[:, 1], solution_table.iloc[:, 3], data_table.max(axis=1)], axis=1)
    info_table.columns = ["id", "best_value", "max_cost"]
    info_table["item_count"] = item_count
    return info_table

def get_info_from_datafiles(path: str):
    dataset = path.split("/")[-1]
    output_table = None
    for filePair in getFiles(path):
        curr_table = construct_table_from(filePair)
        curr_table["dataset"] = dataset
        curr_table = curr_table.set_index(["dataset", "item_count", "id"])
        
        if output_table is not None:
            output_table = output_table.append(curr_table)
        else:
            output_table = curr_table
    
    return output_table

def create_avg_time(table, name: str, column: str = "item_count"):
    # Create a table of average times according to strategy and item_count columns
    avg_times = table.groupby(["strategy", column])['time[#configs]'] \
        .mean().reset_index().set_index(["strategy", column])
    avg_times = avg_times.round(2)

    # Move all values of strategy column into separate columns
    avg_times = avg_times.unstack("strategy")
    avg_times.columns = avg_times.columns.droplevel()
    #avg_times.fillna("-", inplace=True)

    # Save the dataframe to csv
    avg_times.to_excel(f'excel/avg_times_{name}.xlsx', header=True)
    
    return avg_times

## Put data from all analysis files into tables

In [5]:
# Create column lists

cols = get_cols_list(f'{path}/column_description.dat')

In [6]:
# Load tables of all strategies
# balance_table = load_analysis_files(f'{path}/Balance', cols)
# correlation_table = load_analysis_files(f'{path}/Correlation', cols)
# granularity_heavy_table = load_analysis_files(f'{path}/GranularityHeavy', cols)
# granularity_light_table = load_analysis_files(f'{path}/GranularityLight', cols)
maxcost_table = load_analysis_files(f'{path}/MaxCost', cols) \
    .rename(columns={'instance_info':'maxcost'})
maxweight_table = load_analysis_files(f'{path}/MaxWeight', cols) \
    .rename(columns={'instance_info':'maxweight'})
robust_table = load_analysis_files(f'{path}/Robust', cols) \
    .drop(columns="instance_info")
things_table = load_analysis_files(f'{path}/Things', cols) \
    .drop(columns="instance_info")
weight_cap_ratio_table = load_analysis_files(f'{path}/WeightCapRation', cols) \
    .rename(columns={'instance_info':'ratio'})

weight_cap_ratio_table

EmptyDataError: No columns to parse from file

## Things analysis

In [None]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(things_table, "things")

# Create a greedy_table with computed relative_mistake
greedy_table = things_table.iloc[things_table.index.get_level_values('strategy') == "Greedy"] \
    .rename(columns={'maximum_sum':'found_sum'}) \
    .drop(columns="time[#configs]")
dp_table = things_table.iloc[things_table.index.get_level_values('strategy') == "DP"] \
    .drop(columns="time[#configs]")

greedy_table = pandas.merge(greedy_table, dp_table, on=['id', 'item_count', 'dataset'], right_index=True) \
    .iloc[:, [1, 0]]
greedy_table["relative_error"] = numpy.abs(greedy_table["maximum_sum"] - greedy_table["found_sum"])/greedy_table["maximum_sum"]

# Create a table with max and average relative_mistake.
error_group = greedy_table.groupby(["strategy", "item_count"])["relative_error"]

error_max = error_group.max().reset_index().set_index(["strategy", "item_count"]).rename(columns={'relative_error':'max_relative_error'})
error_avg = error_group.mean().reset_index().set_index(["strategy", "item_count"]).rename(columns={'relative_error':'avg_relative_error'})

# Construct, unstack
avg_mistake = error_max.join(error_avg).round(6).unstack("strategy")
avg_mistake.columns = ["max_relative_error", "avg_relative_error"]

avg_mistake.to_excel("excel/avg_mistake_things.xlsx")

avg_mistake


## Robust analysis

In [None]:
robust_analysis = robust_table.reset_index().drop_duplicates(subset=["strategy", "time[#configs]"])
robust_analysis.query("strategy == 'BB'").to_excel("excel/not_robust.xlsx")
robust_analysis.query("strategy != 'BB'").to_excel("excel/is_robust.xlsx")

## MaxCost analysis

In [None]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(maxcost_table, "maxcost", column = "maxcost")

## MaxWeight analysis

In [None]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(maxweight_table, "maxweight", column = "maxweight")

## WeightCapRatio analysis

In [None]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(weight_cap_ratio_table, "weight_cap_ratio", column = "ratio")