# Analysis of output files
## Prepare environment, functions etc.

In [1]:
import os
import sys

# Add local src directory to the path. Then we are able to import our files.
# module_path = os.path.abspath(os.path.join('algorithm_tester'))
# if module_path not in sys.path:
#    sys.path.append(module_path)

In [2]:
import pandas
# import matplotlib
import numpy
from algorithm_tester.helpers import get_analysis_files, getFiles, FilePair
from algorithm_tester.mydataclasses import AnalysisFile

# Zapnout zobrazování grafů (procento uvozuje „magickou” zkratku IPythonu):
# %matplotlib inline

path = 'tester_results'

In [3]:
#pandas.set_option('display.max_rows', None)
#pandas.read_csv?

In [4]:
# Important functions

def remove_bag_results(table):
    delim_index = list(table.iloc[0]).index("|")
    
    return table.iloc[:, 0:delim_index]

def get_cols_list(path: str):
    cols = pandas.read_csv(path, index_col=None, delimiter=" ", header=None)
    cols = remove_bag_results(cols)
    return list(cols.iloc[0])

def load_analysis_files(folder_path: str, column_list):
    files: AnalysisFile = get_analysis_files(folder_path)
    output_table = None
    
    for file in files:
        curr_table = pandas.read_csv(file.full_path, index_col=None, delimiter=" ", header=None)
        curr_table = remove_bag_results(curr_table)
        curr_table.columns = column_list
        curr_table["instance_info"] = file.instance_info
        curr_table["dataset"] = file.dataset
        
        # curr_table["strategy"] = file.strategy
        
        if output_table is not None:
            output_table = output_table.append(curr_table, ignore_index=True)
        else:
            output_table = curr_table
    
    output_table = output_table.set_index(['strategy', 'dataset', 'id', "item_count"])
    output_table.sort_values(by=["strategy", "dataset", "item_count", "id"], inplace=True)
    return output_table

def construct_table_from(filePair: FilePair):
    solution_table = pandas.read_csv(filePair.solutionFile, header=None, index_col=None, delimiter=" ")
    data_table = pandas.read_csv(filePair.dataFile, header=None, index_col=None, delimiter=" ")
    
    item_count = data_table.iloc[0, 1]
    
    solution_table = solution_table.drop_duplicates(subset=[0], keep='first').reset_index()

    data_table = data_table.iloc[:, 4:]
    data_table = data_table[data_table.columns[::2]]

    info_table = pandas.concat([solution_table.iloc[:, 1], solution_table.iloc[:, 3], data_table.max(axis=1)], axis=1)
    info_table.columns = ["id", "best_value", "max_cost"]
    info_table["item_count"] = item_count
    return info_table

def get_info_from_datafiles(path: str):
    dataset = path.split("/")[-1]
    output_table = None
    for filePair in getFiles(path):
        curr_table = construct_table_from(filePair)
        curr_table["dataset"] = dataset
        curr_table = curr_table.set_index(["dataset", "item_count", "id"])
        
        if output_table is not None:
            output_table = output_table.append(curr_table)
        else:
            output_table = curr_table
    
    return output_table

def create_avg_time(table, name: str, column: str = "item_count"):
    # Create a table of average times according to strategy and item_count columns
    avg_times = table.groupby(["strategy", column])['time[#configs]'] \
        .mean().reset_index().set_index(["strategy", column])
    avg_times = avg_times.round(2)

    # Move all values of strategy column into separate columns
    avg_times = avg_times.unstack("strategy")
    avg_times.columns = avg_times.columns.droplevel()
    #avg_times.fillna("-", inplace=True)

    # Save the dataframe to csv
    avg_times.to_excel(f'excel/avg_times_{name}.xlsx', header=True)
    
    return avg_times

## Put data from all analysis files into tables

In [5]:
# Create column lists

cols = get_cols_list(f'{path}/column_description.dat')

In [6]:
# Load tables of all strategies
balance_table = load_analysis_files(f'{path}/Balance', cols) \
    .rename(columns={'instance_info':'balance'})
correlation_table = load_analysis_files(f'{path}/Correlation', cols) \
    .rename(columns={'instance_info':'correlation'})
granularity_heavy_table = load_analysis_files(f'{path}/GranularityHeavy', cols) \
    .rename(columns={'instance_info':'constant'})
granularity_light_table = load_analysis_files(f'{path}/GranularityLight', cols) \
    .rename(columns={'instance_info':'constant'})
maxcost_table = load_analysis_files(f'{path}/MaxCost', cols) \
    .rename(columns={'instance_info':'maxcost'})
maxweight_table = load_analysis_files(f'{path}/MaxWeight', cols) \
    .rename(columns={'instance_info':'maxweight'})
robust_table = load_analysis_files(f'{path}/Robust', cols) \
    .drop(columns="instance_info")
things_table = load_analysis_files(f'{path}/Things', cols) \
    .drop(columns="instance_info")
weight_cap_ratio_table = load_analysis_files(f'{path}/WeightCapRation', cols) \
    .rename(columns={'instance_info':'ratio'})

granularity_heavy_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,maximum_sum,time[#configs],constant
strategy,dataset,id,item_count,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BB,GranularityHeavy,1,10,1369,13,25
BB,GranularityHeavy,1,10,1231,24,10
BB,GranularityHeavy,1,10,1170,24,15
BB,GranularityHeavy,1,10,1256,24,20
BB,GranularityHeavy,1,10,1508,39,05
...,...,...,...,...,...,...
SBB,GranularityHeavy,100,10,1426,45,25
SBB,GranularityHeavy,100,10,979,13,15
SBB,GranularityHeavy,100,10,1458,12,05
SBB,GranularityHeavy,100,10,988,12,10


## Things analysis

In [7]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(things_table, "things")

# Create a greedy_table with computed relative_mistake
greedy_table = things_table.iloc[things_table.index.get_level_values('strategy') == "Greedy"] \
    .rename(columns={'maximum_sum':'found_sum'}) \
    .drop(columns="time[#configs]")
dp_table = things_table.iloc[things_table.index.get_level_values('strategy') == "DP"] \
    .drop(columns="time[#configs]")

greedy_table = pandas.merge(greedy_table, dp_table, on=['id', 'item_count', 'dataset'], right_index=True) \
    .iloc[:, [1, 0]]
greedy_table["relative_error"] = numpy.abs(greedy_table["maximum_sum"] - greedy_table["found_sum"])/greedy_table["maximum_sum"]

# Create a table with max and average relative_mistake.
error_group = greedy_table.groupby(["strategy", "item_count"])["relative_error"]

error_max = error_group.max().reset_index().set_index(["strategy", "item_count"]).rename(columns={'relative_error':'max_relative_error'})
error_avg = error_group.mean().reset_index().set_index(["strategy", "item_count"]).rename(columns={'relative_error':'avg_relative_error'})

# Construct, unstack
avg_mistake = error_max.join(error_avg).round(6).unstack("strategy")
avg_mistake.columns = ["max_relative_error", "avg_relative_error"]

avg_mistake.to_excel("excel/avg_mistake_things.xlsx")

avg_mistake


Unnamed: 0_level_0,max_relative_error,avg_relative_error
item_count,Unnamed: 1_level_1,Unnamed: 2_level_1
5,0.197938,0.009992
10,0.058071,0.007162
15,0.053123,0.002479
20,0.029691,0.00215
25,0.013593,0.001485
30,0.01412,0.001421


## Robust analysis

In [8]:
robust_analysis = robust_table.reset_index().drop_duplicates(subset=["strategy", "time[#configs]"])
robust_analysis.query("strategy == 'BB'").to_excel("excel/not_robust.xlsx")
robust_analysis.query("strategy != 'BB'").to_excel("excel/is_robust.xlsx")

## MaxCost analysis

In [9]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(maxcost_table, "maxcost", column = "maxcost")

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
maxcost,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100,60.28,1016.49,5049.9,10382.1,8.09,26.02
200,60.36,1016.42,10049.4,10382.1,8.09,25.89
300,60.13,1016.15,15047.3,10382.1,8.09,26.04
400,60.41,1016.47,20047.3,10382.1,8.09,26.03
500,60.41,1016.11,25046.4,10382.1,8.09,25.98


## MaxWeight analysis

In [10]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(maxweight_table, "maxweight", column = "maxweight")

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
maxweight,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100,60.27,1016.34,12547.6,4175.7,8.09,26.01
200,60.18,1016.32,12547.6,8314.5,8.1,26.13
300,59.94,1016.31,12547.6,12454.7,8.12,26.01
400,60.24,1016.29,12547.6,16591.5,8.11,26.13
500,59.96,1016.28,12547.6,20729.0,8.12,26.11


## WeightCapRatio analysis

In [11]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(weight_cap_ratio_table, "weight_cap_ratio", column = "ratio")

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
ratio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,45.2,65.27,12547.6,1293.9,2.42,64.51
2,129.72,185.65,12547.6,2592.6,3.53,167.81
3,242.97,370.03,12547.6,3890.2,4.52,284.48
4,330.19,583.92,12547.6,5189.2,5.34,338.48
5,332.37,779.13,12547.6,6489.4,6.06,286.72
6,240.63,917.38,12547.6,7785.5,6.86,170.39
7,134.41,989.07,12547.6,9083.5,7.49,72.13
8,60.51,1016.28,12547.6,10382.1,8.09,25.97
9,24.58,1022.36,12547.6,11680.2,8.98,12.3
10,10.0,1023.0,12547.6,12983.2,10.0,10.0


## Correlation analysis

In [12]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(correlation_table, "correlation", column = "correlation")

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
correlation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Corr,174.83,998.64,12875.4,10261.9,7.62,136.44
Strong,186.83,1021.32,14008.7,10220.7,8.23,520.33
Uni,60.51,1016.28,12547.6,10382.1,8.09,25.97


## Balance analysis

In [13]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(balance_table, "balance", column = "balance")

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
balance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bal,60.51,1016.28,12547.6,10382.1,8.09,25.97
Heavy,56.87,1015.28,12621.0,6671.1,8.47,28.01
Light,53.31,1017.74,12621.0,13401.4,7.97,20.47


## Granularity analysis

In [14]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(granularity_heavy_table, "granularity_heavy", column = "constant")

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
constant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5,49.77,1016.95,12507.3,11984.4,8.04,23.3
10,53.31,1017.74,12621.0,13401.4,7.97,20.47
15,58.59,1017.74,12770.7,14403.5,7.96,18.76
20,59.52,1017.9,12678.1,15161.3,7.91,19.26
25,60.69,1017.95,12830.3,15635.6,7.92,20.23


In [15]:
# Create a table of average times according to strategy and item_count columns
create_avg_time(granularity_light_table, "granularity_light", column = "constant")

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
constant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5,49.77,1016.95,12507.3,11984.4,8.04,23.3
10,53.31,1017.74,12621.0,13401.4,7.97,20.47
15,58.59,1017.74,12770.7,14403.5,7.96,18.76
20,59.52,1017.9,12678.1,15161.3,7.91,19.26
25,60.69,1017.95,12830.3,15635.6,7.92,20.23
