# Analysis of output files
## Prepare environment, functions etc.

In [1]:
import os
import sys

# Add local src directory to the path. Then we are able to import our files.
# module_path = os.path.abspath(os.path.join('algorithm_tester'))
# if module_path not in sys.path:
#    sys.path.append(module_path)

In [2]:
import pandas
# import matplotlib
import numpy
from algorithm_tester.helpers import get_analysis_files, getFiles, FilePair
from algorithm_tester.mydataclasses import AnalysisFile

# Zapnout zobrazování grafů (procento uvozuje „magickou” zkratku IPythonu):
# %matplotlib inline

path = 'tester_results'

In [3]:
#pandas.set_option('display.max_rows', None)
#pandas.read_csv?

In [4]:
# Important functions

def remove_bag_results(table):
    delim_index = list(table.iloc[0]).index("|")
    
    return table.iloc[:, 0:delim_index]

def get_cols_list(path: str):
    cols = pandas.read_csv(path, index_col=None, delimiter=" ", header=None)
    cols = remove_bag_results(cols)
    return list(cols.iloc[0])

def load_analysis_files(folder_path: str, column_list):
    files: AnalysisFile = get_analysis_files(folder_path)
    output_table = None
    
    for file in files:
        curr_table = pandas.read_csv(file.full_path, index_col=None, delimiter=" ", header=None)
        curr_table = remove_bag_results(curr_table)
        curr_table.columns = column_list
        curr_table["dataset"] = file.dataset
        # curr_table["strategy"] = file.strategy
        
        if output_table is not None:
            output_table = output_table.append(curr_table, ignore_index=True)
        else:
            output_table = curr_table
    
    output_table = output_table.set_index(['strategy', 'dataset', 'id', "item_count"])
    output_table.sort_values(by=["strategy", "dataset", "item_count", "id"], inplace=True)
    return output_table

def construct_table_from(filePair: FilePair):
    solution_table = pandas.read_csv(filePair.solutionFile, header=None, index_col=None, delimiter=" ")
    data_table = pandas.read_csv(filePair.dataFile, header=None, index_col=None, delimiter=" ")
    
    item_count = data_table.iloc[0, 1]
    
    solution_table = solution_table.drop_duplicates(subset=[0], keep='first').reset_index()

    data_table = data_table.iloc[:, 4:]
    data_table = data_table[data_table.columns[::2]]

    info_table = pandas.concat([solution_table.iloc[:, 1], solution_table.iloc[:, 3], data_table.max(axis=1)], axis=1)
    info_table.columns = ["id", "best_value", "max_cost"]
    info_table["item_count"] = item_count
    return info_table

def get_info_from_datafiles(path: str):
    dataset = path.split("/")[-1]
    output_table = None
    for filePair in getFiles(path):
        curr_table = construct_table_from(filePair)
        curr_table["dataset"] = dataset
        curr_table = curr_table.set_index(["dataset", "item_count", "id"])
        
        if output_table is not None:
            output_table = output_table.append(curr_table)
        else:
            output_table = curr_table
    
    return output_table

## Put data from all analysis files into tables

In [5]:
# Create column lists

cols = get_cols_list(f'{path}/column_description.dat')

In [46]:
# Load tables of all strategies
# balance_table = load_analysis_files(f'{path}/Balance', cols)
# correlation_table = load_analysis_files(f'{path}/Correlation', cols)
# granularity_heavy_table = load_analysis_files(f'{path}/GranularityHeavy', cols)
# granularity_light_table = load_analysis_files(f'{path}/GranularityLight', cols)
# maxcost_table = load_analysis_files(f'{path}/MaxCost', cols)
# maxweight_table = load_analysis_files(f'{path}/MaxWeight', cols)
# robust_table = load_analysis_files(f'{path}/Robust', cols)
things_table = load_analysis_files(f'{path}/Things', cols)
# weight_cap_ratio_table = load_analysis_files(f'{path}/WeightCapRatio', cols)

# things_table.iloc[things_table.index.get_level_values('strategy') == "Greedy"]
things_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,maximum_sum,time[#configs]
strategy,dataset,id,item_count,Unnamed: 4_level_1,Unnamed: 5_level_1
BB,Things,1,5,485,9
BB,Things,2,5,559,14
BB,Things,3,5,626,7
BB,Things,4,5,523,9
BB,Things,5,5,589,14
...,...,...,...,...,...
SBB,Things,96,25,3176,37
SBB,Things,97,25,2843,2454
SBB,Things,98,25,3027,266
SBB,Things,99,25,2885,1382


## Get average time values for all strategies

In [49]:
# Add all table rows into 1 table
avg_times = things_table

# Create a table of average times according to strategy and item_count columns
avg_times = avg_times.groupby(["strategy", "item_count"])['time[#configs]'] \
    .mean().reset_index().set_index(["strategy", "item_count"])
avg_times = avg_times.round(2)

# Move all values of strategy column into separate columns
avg_times = avg_times.unstack("strategy")
avg_times.columns = avg_times.columns.droplevel()

# Save the dataframe to csv
avg_times.to_excel('excel/avg_times.xlsx', header=True)

avg_times

strategy,BB,Brute,DP,DPWeight,Greedy,SBB
item_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5,9.37,30.74,3183.8,2563.35,3.98,6.64
10,60.51,1016.28,12547.6,10382.1,8.09,25.97
15,391.11,32648.02,28080.6,22912.5,12.56,153.05
20,2724.37,1046948.22,49603.2,40826.0,16.81,400.47
25,29475.44,33531731.33,77524.5,63255.25,21.21,5568.65
