# Analysis of output files
## Prepare environment, functions etc.

In [None]:
import os
import sys

# Add local src directory to the path. Then we are able to import our files.
module_path = os.path.abspath(os.path.join('src'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import pandas
import matplotlib
from helpers import get_analysis_files, getFiles, FilePair
from myDataClasses import AnalysisFile

path = 'analysisOutput/results'

In [None]:
#pandas.read_csv?

In [None]:
# Important functions

def remove_bag_results(table):
    delim_index = list(table.iloc[0]).index("|")
    
    return table.iloc[:, 0:delim_index]

def get_cols_list(path: str):
    cols = pandas.read_csv(path, index_col=None, delimiter=" ", header=None)
    cols = remove_bag_results(cols)
    return list(cols.iloc[0])

def load_analysis_files(folder_path: str, column_list):
    files: AnalysisFile = get_analysis_files(folder_path)
    output_table = None
    
    for file in files:
        curr_table = pandas.read_csv(file.full_path, index_col=None, delimiter=" ", header=None)
        curr_table = remove_bag_results(curr_table)
        curr_table.columns = column_list
        curr_table["dataset"] = file.dataset
        curr_table["strategy"] = file.strategy
        
        if output_table is not None:
            output_table = output_table.append(curr_table, ignore_index=True)
        else:
            output_table = curr_table
    
    output_table = output_table.set_index(['strategy', 'dataset', 'id', "item_count"])
    return output_table

def construct_table_from(filePair: FilePair):
    solution_table = pandas.read_csv(filePair.solutionFile, header=None, index_col=None, delimiter=" ")
    data_table = pandas.read_csv(filePair.dataFile, header=None, index_col=None, delimiter=" ")
    
    item_count = data_table.iloc[0, 1]
    
    solution_table = solution_table.drop_duplicates(subset=[0], keep='first').reset_index()

    data_table = data_table.iloc[:, 4:]
    data_table = data_table[data_table.columns[::2]]

    info_table = pandas.concat([solution_table.iloc[:, 3], data_table.max(axis=1)], axis=1)
    info_table.columns = ["best_value", "max_cost"]
    info_table["item_count"] = item_count
    return info_table

def get_info_from_datafiles(path: str):
    dataset = path.split("/")[-1]
    output_table = None
    for filePair in getFiles(path):
        curr_table = construct_table_from(filePair)
        curr_table["dataset"] = dataset
        curr_table = curr_table.set_index(["dataset", "item_count"])
        
        if output_table is not None:
            output_table = output_table.append(curr_table)
        else:
            output_table = curr_table
    
    return output_table

## Put data from all analysis files into tables

In [None]:
# Create column lists

dp_cols = get_cols_list(f'{path}/DP/column_description_DP.dat')
dpweight_cols = get_cols_list(f'{path}/DPWeight/column_description_DPWeight.dat')
greedy_cols = get_cols_list(f'{path}/Greedy/column_description_Greedy.dat')
greedyone_cols = get_cols_list(f'{path}/GreedyOne/column_description_GreedyOne.dat')
fptas_cols = get_cols_list(f'{path}/FPTAS/column_description_FPTAS.dat')

In [None]:
# Load tables of all strategies
dp_table = load_analysis_files(f'{path}/DP', dp_cols)
dpweight_table = load_analysis_files(f'{path}/DPWeight', dpweight_cols)
greedy_table = load_analysis_files(f'{path}/Greedy', greedy_cols)
greedyone_table = load_analysis_files(f'{path}/GreedyOne', greedyone_cols)
fptas_table = load_analysis_files(f'{path}/FPTAS', fptas_cols)

## Get average time values for all strategies

In [None]:
# Add all table rows into 1 table
avg_times = fptas_table.drop(["relative_error"], axis=1).append(dp_table).append(dpweight_table).append(greedy_table).append(greedyone_table)

# Create a table of average times according to strategy and item_count columns
avg_times = avg_times.groupby(["strategy", "item_count"])["time"].mean().reset_index().set_index(["strategy", "item_count"])
avg_times = avg_times.round(4)

# Move all values of strategy column into separate columns
avg_times = avg_times.unstack("strategy")
avg_times.columns = avg_times.columns.droplevel()

# Save the dataframe to csv
avg_times.to_csv ('analysisOutput/avg_times.csv', header=True)

avg_times

## FPTAS error/item_count analysis

In [None]:
nk_info = get_info_from_datafiles("data/NK")
zkc_info = get_info_from_datafiles("data/ZKC")
zkw_info = get_info_from_datafiles("data/ZKW")

nk_info