# Analysis of output files
## Prepare environment, functions etc.

In [1]:
import os
import sys

# Add local src directory to the path. Then we are able to import our files.
module_path = os.path.abspath(os.path.join('src'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas
import matplotlib
from helpers import get_analysis_files, getFiles, FilePair
from myDataClasses import AnalysisFile

path = 'analysisOutput/results'

In [3]:
#pandas.set_option('display.max_rows', None)
#pandas.read_csv?

In [4]:
# Important functions

def remove_bag_results(table):
    delim_index = list(table.iloc[0]).index("|")
    
    return table.iloc[:, 0:delim_index]

def get_cols_list(path: str):
    cols = pandas.read_csv(path, index_col=None, delimiter=" ", header=None)
    cols = remove_bag_results(cols)
    return list(cols.iloc[0])

def load_analysis_files(folder_path: str, column_list):
    files: AnalysisFile = get_analysis_files(folder_path)
    output_table = None
    
    for file in files:
        curr_table = pandas.read_csv(file.full_path, index_col=None, delimiter=" ", header=None)
        curr_table = remove_bag_results(curr_table)
        curr_table.columns = column_list
        curr_table["dataset"] = file.dataset
        curr_table["strategy"] = file.strategy
        
        if output_table is not None:
            output_table = output_table.append(curr_table, ignore_index=True)
        else:
            output_table = curr_table
    
    output_table = output_table.set_index(['strategy', 'dataset', 'id', "item_count"])
    return output_table

def construct_table_from(filePair: FilePair):
    solution_table = pandas.read_csv(filePair.solutionFile, header=None, index_col=None, delimiter=" ")
    data_table = pandas.read_csv(filePair.dataFile, header=None, index_col=None, delimiter=" ")
    
    item_count = data_table.iloc[0, 1]
    
    solution_table = solution_table.drop_duplicates(subset=[0], keep='first').reset_index()

    data_table = data_table.iloc[:, 4:]
    data_table = data_table[data_table.columns[::2]]

    info_table = pandas.concat([solution_table.iloc[:, 1], solution_table.iloc[:, 3], data_table.max(axis=1)], axis=1)
    info_table.columns = ["id", "best_value", "max_cost"]
    info_table["item_count"] = item_count
    return info_table

def get_info_from_datafiles(path: str):
    dataset = path.split("/")[-1]
    output_table = None
    for filePair in getFiles(path):
        curr_table = construct_table_from(filePair)
        curr_table["dataset"] = dataset
        curr_table = curr_table.set_index(["dataset", "item_count", "id"])
        
        if output_table is not None:
            output_table = output_table.append(curr_table)
        else:
            output_table = curr_table
    
    return output_table

## Put data from all analysis files into tables

In [5]:
# Create column lists

dp_cols = get_cols_list(f'{path}/DP/column_description_DP.dat')
dpweight_cols = get_cols_list(f'{path}/DPWeight/column_description_DPWeight.dat')
greedy_cols = get_cols_list(f'{path}/Greedy/column_description_Greedy.dat')
greedyone_cols = get_cols_list(f'{path}/GreedyOne/column_description_GreedyOne.dat')
fptas_cols = get_cols_list(f'{path}/FPTAS/column_description_FPTAS.dat')

In [6]:
# Load tables of all strategies
dp_table = load_analysis_files(f'{path}/DP', dp_cols)
dpweight_table = load_analysis_files(f'{path}/DPWeight', dpweight_cols)
greedy_table = load_analysis_files(f'{path}/Greedy', greedy_cols)
greedyone_table = load_analysis_files(f'{path}/GreedyOne', greedyone_cols)
fptas_table = load_analysis_files(f'{path}/FPTAS', fptas_cols)

## Get average time values for all strategies

In [7]:
# Add all table rows into 1 table
avg_times = fptas_table.drop(["relative_error"], axis=1).append(dp_table).append(dpweight_table).append(greedy_table).append(greedyone_table)

# Create a table of average times according to strategy and item_count columns
avg_times = avg_times.groupby(["strategy", "item_count"])["time"].mean().reset_index().set_index(["strategy", "item_count"])
avg_times = avg_times.round(4)

# Move all values of strategy column into separate columns
avg_times = avg_times.unstack("strategy")
avg_times.columns = avg_times.columns.droplevel()

# Save the dataframe to csv
avg_times.to_csv ('analysisOutput/avg_times.csv', header=True)

avg_times

strategy,DP,DPWeight,FPTAS,Greedy,GreedyOne
item_count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4,93.6982,1.4304,0.8519,0.0105,0.0079
10,456.8244,7.7131,10.227,0.016,0.0099
15,1106.8275,21.3662,32.6015,0.0204,0.0119
20,1950.3325,37.899,83.623,0.0269,0.0133
22,1876.8124,51.397,116.6782,0.0317,0.0171
25,3221.8843,61.1677,175.6165,0.0305,0.0163
27,2785.1095,72.0347,222.2353,0.0372,0.0168
30,3349.9526,85.181,292.5677,0.037,0.0186
32,4461.4599,103.9543,331.5438,0.0387,0.0179
35,7109.8726,119.1089,400.9741,0.044,0.0194


## FPTAS error/item_count analysis

In [19]:
# Create info_table which contains best values and highest cost for all instances
nk_info = get_info_from_datafiles("data/NK")
zkc_info = get_info_from_datafiles("data/ZKC")
zkw_info = get_info_from_datafiles("data/ZKW")

info_table = nk_info.append(zkc_info).append(zkw_info)

In [20]:
info_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,best_value,max_cost
dataset,item_count,id,Unnamed: 3_level_1,Unnamed: 4_level_1
NK,4,1,1129,2084
NK,4,2,2966,1503
NK,4,3,0,2253
NK,4,4,70,1826
NK,4,5,2758,1755
...,...,...,...,...
ZKW,40,1729,5048,4725
ZKW,40,1846,7956,7956
ZKW,40,1935,7609,7609
ZKW,40,1987,8712,8712


In [27]:
# Prepare the table for fptas analysis which has data from info_table
fptas_analysis_table = fptas_table.reset_index().drop(columns="strategy")
fptas_analysis_table = fptas_analysis_table.set_index(["dataset", "item_count", "relative_error", "id"])
fptas_analysis_table = fptas_analysis_table.join(info_table)
fptas_analysis_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,maximum_sum,time,best_value,max_cost
dataset,item_count,id,relative_error,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
NK,4,1,0.2,1129,0.880371,1129,2084
NK,4,1,0.4,1129,0.491997,1129,2084
NK,4,1,0.3,1129,0.832692,1129,2084
NK,4,1,0.1,1129,0.871974,1129,2084
NK,4,1,0.5,1129,0.419170,1129,2084
...,...,...,...,...,...,...,...
ZKW,40,1991,0.2,4660,325.573507,4660,4660
ZKW,40,1991,0.4,4660,211.777098,4660,4660
ZKW,40,1991,0.1,4660,650.592674,4660,4660
ZKW,40,1991,0.3,4660,181.874641,4660,4660


In [28]:
fptas_analysis_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,maximum_sum,time,best_value,max_cost
dataset,item_count,id,relative_error,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
NK,4,1,0.2,1129,0.880371,1129,2084
NK,4,1,0.4,1129,0.491997,1129,2084
NK,4,1,0.3,1129,0.832692,1129,2084
NK,4,1,0.1,1129,0.871974,1129,2084
NK,4,1,0.5,1129,0.419170,1129,2084
...,...,...,...,...,...,...,...
ZKW,40,1991,0.2,4660,325.573507,4660,4660
ZKW,40,1991,0.4,4660,211.777098,4660,4660
ZKW,40,1991,0.1,4660,650.592674,4660,4660
ZKW,40,1991,0.3,4660,181.874641,4660,4660
